From 16abce880939b746d5604412f7cbcfe8254e08cf Mon Sep 17 00:00:00 2001 From: Konstantin Demin Date: Wed, 18 Jun 2025 10:38:23 +0300 Subject: [PATCH] release 6.15.2 (preliminary) --- debian/bin/{genpatch-pfkernel => genpatch-pf} | 20 +- debian/bin/genpatch-zen | 60 + debian/changelog | 88 +- debian/config/amd64/config.cloud | 41 +- debian/config/amd64/config.mobile | 127 +- debian/config/amd64/config.vm | 80 +- debian/config/config | 122 +- .../all/disable-some-marvell-phys.patch | 10 +- ...ve-source-paths-in-abi-documentation.patch | 32 - .../kbuild-fix-recordmcount-dependency.patch | 2 +- ...cflags-through-to-libbpf-build-again.patch | 2 +- ...ove-bpf-run-time-check-at-build-time.patch | 2 +- ...ix-missing-ldflags-for-some-programs.patch | 2 +- ...nprivileged-CLONE_NEWUSER-by-default.patch | 18 +- ...uilding-ashmem-and-binder-as-modules.patch | 2 +- ...as-mitigation-against-local-exploits.patch | 4 +- ...rt-symbols-needed-by-android-drivers.patch | 4 +- ...n-use-of-fanotify_access_permissions.patch | 2 +- ...ect-loading-failures-as-info-for-d-i.patch | 37 + ...ink-security-restrictions-by-default.patch | 2 +- debian/patches/debian/kernelvariables.patch | 4 +- ...ing-source-filenames-from-executable.patch | 2 +- ...compiler-version-comparison-optional.patch | 2 +- .../tools-perf-install-python-bindings.patch | 4 +- ...tools-perf-perf-read-vdso-in-libexec.patch | 2 +- .../debian/uname-version-timestamp.patch | 2 +- .../debian/yama-disable-by-default.patch | 2 +- ...ecure_boot-flag-to-indicate-secure-b.patch | 2 +- ...e-kernel-if-booted-in-secure-boot-mo.patch | 4 +- ...rther-restriction-of-perf_event_open.patch | 22 +- ...ption-to-exclude-integrated-gpu-only.patch | 6 +- ...make-x32-syscall-support-conditional.patch | 142 +- ...ail-the-kernel-build-on-fatal-errors.patch | 52 - ...t.patch => 0001-established-timeout.patch} | 0 ...cal-ports.patch => 0002-local-ports.patch} | 0 ...patch => 0003-bridge-group_fwd_mask.patch} | 0 ...s-genkey.patch => 0004-certs-genkey.patch} | 0 ...raysky2-more-ISA-levels-and-uarches.patch} | 752 +- ...e-CONFIG_OPTIMIZE_FOR_PERFORMANCE_O3.patch | 6 +- ...ONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3.patch | 2 +- ...-Prevent-generating-avx2-and-avx512-.patch | 20 +- ...ust-KBUILD_CFLAGS-fno-tree-vectorize.patch | 6 +- ...d-GCC-SMS-based-modulo-scheduling-fl.patch | 7 +- ...te-Remove-the-redundant-des_perf-cla.patch | 27 - ...tate-Modularize-perf-freq-conversion.patch | 133 - ...te-Remove-the-unnecessary-cpufreq_up.patch | 37 - ...te-Use-scope-based-cleanup-for-cpufr.patch | 124 - ...te-Remove-the-unncecessary-driver_lo.patch | 26 - ...tate-Fix-the-clamping-of-perf-values.patch | 35 - ...te-Show-a-warning-when-a-CPU-fails-t.patch | 35 - ...te-Drop-min-and-max-cached-frequenci.patch | 209 - ...pstate-Move-perf-values-into-a-union.patch | 611 - ...-cpufreq-amd-pstate-Overhaul-locking.patch | 81 - ...req-amd-pstate-Drop-cppc_cap1_cached.patch | 48 - ...te-ut-Use-_free-macro-to-free-put-po.patch | 144 - ...te-ut-Allow-lowest-nonlinear-and-low.patch | 37 - ...state-ut-Drop-SUCCESS-and-FAIL-enums.patch | 309 - ...te-ut-Run-on-all-of-the-correct-CPUs.patch | 50 - ...-amd-pstate-ut-Adjust-variable-scope.patch | 42 - ...te-Replace-all-AMD_CPPC_-macros-with.patch | 123 - ...te-Cache-CPPC-request-in-shared-mem-.patch | 60 - ...te-Move-all-EPP-tracing-into-_update.patch | 318 - ...te-Update-cppc_req_cached-for-shared.patch | 37 - ...te-Drop-debug-statements-for-policy-.patch | 38 - ...freq-amd-pstate-Rework-CPPC-enabling.patch | 327 - ...-cpufreq-amd-pstate-Stop-caching-EPP.patch | 105 - ...te-Drop-actions-in-amd_pstate_epp_cp.patch | 39 - ...te-fix-warning-noticed-by-kernel-tes.patch | 41 - ...te-Fix-min_limit-perf-and-freq-updat.patch | 42 - ...puidle-Prefer-teo-over-menu-governor.patch | 4 +- ...ts-make-the-fast-path-64-bit-specifi.patch | 65 - ...tr-rewrite-AESNI-AVX-optimized-CTR-a.patch | 1857 -- ...t-to-check-the-longest-symbol-length.patch | 176 - ..._pages-update-error-in-dirty_ratio_h.patch | 70 + ...-for-vc_origin-address-range-in-vgac.patch | 179 + ...uplicate-unlikely-definition-in-insn.patch | 36 - ...ister_framebuffer-to-prevent-null-pt.patch | 102 + ..._var-to-prevent-null-ptr-deref-in-fb.patch | 65 + ...t-scan-before-removing-link-interfac.patch | 40 - ...tch-CONFIG_SYSFS_SYCALL-default-to-n.patch | 49 - ...n_inode-use-a-proper-mode-internally.patch | 113 + ...-anon_inode-explicitly-block-setattr.patch | 80 + ...ode-raise-SB_I_NODEV-and-SB_I_NOEXEC.patch | 39 + .../fixes/0008-fs-add-S_ANON_INODE.patch | 136 + ...override-creating-attribute-file-fai.patch | 35 + ...propagate-mounts-into-detached-trees.patch | 104 + ...dropbehind-invalidate-on-folio-dirty.patch | 51 + ...ilemap_end_dropbehind-for-read-inval.patch | 51 + ...le-FOP_DONTCACHE-for-now-due-to-bugs.patch | 29 + ...p-unify-read-write-dropbehind-naming.patch | 36 + ...-dropbehind-flag-testing-and-clearin.patch | 78 + ...x-race-with-folio-split-free-using-t.patch | 98 + ...ected_ref_count-for-reference-count-.patch | 198 + ...te-be-overwritten-when-expanding-vma.patch | 129 + ...re-page-tables-during-VMA-split-not-.patch | 217 + ...ix-huge_pmd_unshare-vs-GUP-fast-race.patch | 50 + ...e-madvise_lock-failure-during-race-u.patch | 48 + ...o-Relocate-framebuffers-behind-PCI-b.patch | 164 + ...b-Fix-screen_info-type-check-for-VGA.patch | 86 + ...l-Cure-TIF_IO_BITMAP-inconsistencies.patch | 113 + ...chdog-may-detect-false-positive-of-s.patch | 200 + ...26-sched-rt-Fix-race-in-push_rt_task.patch | 288 + ...r-Adhere-to-place_entity-constraints.patch | 62 + ...-module-codetag-load-errors-as-modul.patch | 184 + ...er-the-device-if-svc_rdma_accept-fai.patch | 29 + ...hang-on-NFS-mount-with-xprtsec-m-tls.patch | 53 + ...tential-deadlock-in-netvsc_vf_setxdp.patch | 89 + ...r-the-dst-when-changing-skb-protocol.patch | 113 + ...ch_sfq-reject-invalid-perturb-period.patch | 67 + ...-fix-race-between-handle_posix_cpu_t.patch | 51 + ...-iterator-on-commit_merge-OOM-failur.patch | 93 + ...ical-race-where-stale-TLB-entries-co.patch | 90 + ...n-t-truncate-end-buffer-for-multiple.patch | 33 + ...-polled-uring_cmd-end_io-work-to-tas.patch | 54 + ...EMULATES_ZONE_APPEND-flag-on-BIO-com.patch | 33 + ...equest-list-tail-for-one-shot-backme.patch | 65 + ...ove-pv_ops.mmu.tlb_remove_table-call.patch | 89 - ...lidate-full-flush-threshold-decision.patch | 87 - ...dd-INVLPGB-feature-and-Kconfig-entry.patch | 103 - ...0004-x86-mm-Add-INVLPGB-support-code.patch | 170 - ...m-Use-INVLPGB-for-kernel-TLB-flushes.patch | 97 - ...oadcast-TLB-flushing-in-page-reclaim.patch | 32 - ...bal-ASID-allocation-helper-functions.patch | 286 - ...obal-ASID-context-switch-and-TLB-flu.patch | 219 - ...Add-global-ASID-process-exit-helpers.patch | 88 - ...oadcast-TLB-invalidation-for-multi-t.patch | 219 - ...ble-AMD-translation-cache-extensions.patch | 83 - ...t-the-ASID-valid-bit-for-the-INVLPGB.patch | 121 - ...roadcast-flush-from-reclaim-if-pages.patch | 70 - ...-window-where-TLB-flushes-may-be-ina.patch | 92 - ...ice_write_prof_mask_reg-as-noinline.patch} | 2 +- ...11-mark-copy_mesh_setup-as-noinline.patch} | 2 +- ...filesystem-in-case-genl_register_fam.patch | 39 + ...tween-nfsd-registration-and-exports_.patch | 162 + ...checking-for-NLM-under-XPRTSEC-polic.patch | 35 + ...ust_allow-must-check-this-is-a-v4-co.patch | 32 + ...ssc-before-laundromat_work-to-preven.patch | 47 + ...ement-FATTR4_CLONE_BLKSIZE-attribute.patch | 62 + ...double-unlock-bug-in-nfs_return_empt.patch | 65 + ...eck-for-OPEN-feature-support-in-v4.1.patch | 32 + ...e-for-LOCALIO-support-asynchronously.patch | 96 + ...add-NULL-check-in-automount_fullpath.patch | 29 + ...ctions-for-all-channels-when-reconne.patch | 39 + ...ddr-whenever-channel-iface-is-update.patch | 31 + ...ion-is-needed-only-for-primary-chann.patch | 33 + ...he-channel-loading-lag-while-picking.patch | 73 + ...ther-channels-when-query-server-inte.patch | 82 + ...disable-interface-polling-on-failure.patch | 64 + ...ctory-cache-reuse-for-readdir-operat.patch | 148 + ...-perags-are-initialised-when-trimmin.patch | 81 + .../0001-zstd-import-upstream-v1.5.7.patch | 23402 ---------------- ...efactor-intentional-wrap-around-test.patch | 58 - .../binder/0001-binder-turn-into-module.patch | 10 +- ...cept-in-LIFO-order-for-cache-efficie.patch | 8 +- ...re-Enable-stateless-firmware-loading.patch | 4 +- .../0003-locking-rwsem-spin-faster.patch | 4 +- ...ivers-initialize-ata-before-graphics.patch | 4 +- ...netfilter-nf_tables-fullcone-support.patch | 8 +- ...-netfilter-add-xt_FLOWOFFLOAD-target.patch | 4 +- ...den-app-limited-rate-sample-detectio.patch | 10 +- ...hrink-delivered_mstamp-first_tx_msta.patch | 8 +- ...napshot-packets-in-flight-at-transmi.patch | 12 +- ...ount-packets-lost-over-TCP-rate-samp.patch | 8 +- ...xport-FLAG_ECE-in-rate_sample.is_ece.patch | 8 +- ...ntroduce-ca_ops-skb_marked_lost-CC-m.patch | 8 +- ...djust-skb-tx.in_flight-upon-merge-in.patch | 6 +- ...djust-skb-tx.in_flight-upon-split-in.patch | 12 +- ...ca-opts-flag-TCP_CONG_WANTS_CE_EVENT.patch | 18 +- ...alize-TSO-sizing-in-TCP-CC-module-AP.patch | 8 +- ..._ack_mode-1-skip-rwin-check-in-tcp_f.patch | 14 +- ...ecord-app-limited-status-of-TLP-repa.patch | 8 +- ...nform-CC-module-of-losses-repaired-b.patch | 8 +- ...ntroduce-is_acking_tlp_retrans_seq-i.patch | 14 +- ...r-route-feature-RTAX_FEATURE_ECN_LOW.patch | 26 +- ...pdate-TCP-bbr-congestion-control-mod.patch | 94 +- ...nsure-ECN-enabled-BBR-flows-set-ECT-.patch | 20 +- ...OPT_ECN_LOW-in-tcp_info-tcpi_options.patch | 8 +- ...-to-skip-tcp-collapse-processing-whe.patch | 14 +- ...errides-for-missing-ACS-capabilities.patch | 4 +- ...001-extcon-Add-driver-for-Steam-Deck.patch | 4 +- ...d-driver-for-Steam-Deck-s-EC-sensors.patch | 8 +- ...hwmon-Add-support-for-max-battery-le.patch | 4 +- ...mdeck-Add-support-for-Steam-Deck-LED.patch | 8 +- ...d-Add-MFD-core-driver-for-Steam-Deck.patch | 8 +- ...pose-controller-board-power-in-sysfs.patch | 4 +- ...onfig-file-required-to-sign-external.patch | 4 +- ...emove-GCC-minimal-function-alignment.patch | 8 +- ...scheduler-tunable-latencies-to-unsca.patch | 4 +- ...-yield_type-sysctl-to-reduce-or-disa.patch | 17 +- ...deadline-Increase-write-priority-to-.patch | 4 +- ...deadline-Disable-front_merges-by-def.patch | 4 +- ...-rq_affinity-to-force-complete-I-O-r.patch | 6 +- ...et-wbt_default_latency_nsec-to-2msec.patch | 6 +- ...dd-500Hz-timer-interrupt-kernel-conf.patch | 4 +- ...che_pressure-50-decreases-the-rate-a.patch | 12 +- ...mm-Raise-max_map_count-default-value.patch | 8 +- ...mm-vmscan-Reduce-amount-of-swapping.patch} | 8 +- ...ogroup-Add-kernel-parameter-and-conf.patch | 10 +- ...unes-ondemand-and-conservative-gover.patch | 4 +- ...ig.debug-disable-default-SYMBOLIC_ER.patch | 8 +- ...etlocalversion-remove-tag-for-git-re.patch | 4 +- ...etlocalversion-Move-localversion-fil.patch | 4 +- ...-skip-simpledrm-if-nvidia-drm.modese.patch | 4 +- ...ent-Fix-not-using-key-encryption-siz.patch | 191 - ...rs-notice-when-running-old-Intel-mic.patch | 471 + ...per-process-KSM-control-via-syscalls.patch | 398 - .../sauce/0001-ZEN-Add-VHBA-driver.patch | 18 +- ...BA-fix-building-with-kernel-6.14-rc1.patch | 28 - ...-Intel-remapped-NVMe-device-support.patch} | 4 +- ...-Disable-stack-conservation-for-GCC.patch} | 4 +- ...-use-call_rcu-when-detaching-client.patch} | 2 +- ...e-schedutil-dependency-on-Intel-AMD.patch} | 2 +- ...l-pstate-Implement-enable-parameter.patch} | 6 +- ...-Allow-override-of-min_power_limit-.patch} | 14 +- ...d-early-when-nothing-s-waiting-for-.patch} | 26 +- ...Disable-staggered-spinup-by-default.patch} | 2 +- ...g.preempt-Remove-EXPERT-conditional.patch} | 2 +- ...11-ZEN-INTERACTIVE-Base-config-item.patch} | 4 +- ...se-BFQ-as-the-elevator-for-SQ-devic.patch} | 4 +- ...se-Kyber-as-the-elevator-for-MQ-dev.patch} | 4 +- ...nable-background-reclaim-of-hugepag.patch} | 4 +- ...ACTIVE-Tune-EEVDF-for-interactivity.patch} | 14 +- ...une-ondemand-governor-for-interacti.patch} | 4 +- ...E-mm-Disable-unevictable-compaction.patch} | 6 +- ...m-Disable-watermark-boosting-by-def.patch} | 8 +- ...m-Lower-the-non-hugetlbpage-pageblo.patch} | 4 +- ...m-crypt-Disable-workqueues-for-cryp.patch} | 6 +- ...E-mm-swap-Disable-swap-in-readahead.patch} | 10 +- ...TIVE-Document-PDS-BMQ-configuration.patch} | 4 +- debian/patches/series | 174 +- 230 files changed, 6762 insertions(+), 32303 deletions(-) rename debian/bin/{genpatch-pfkernel => genpatch-pf} (70%) create mode 100755 debian/bin/genpatch-zen delete mode 100644 debian/patches/bugfix/all/documentation-use-relative-source-paths-in-abi-documentation.patch create mode 100644 debian/patches/debian/firmware_loader-log-direct-loading-failures-as-info-for-d-i.patch delete mode 100644 debian/patches/krd/0001-Revert-objtool-dont-fail-the-kernel-build-on-fatal-errors.patch rename debian/patches/krd/{0002-established-timeout.patch => 0001-established-timeout.patch} (100%) rename debian/patches/krd/{0003-local-ports.patch => 0002-local-ports.patch} (100%) rename debian/patches/krd/{0004-bridge-group_fwd_mask.patch => 0003-bridge-group_fwd_mask.patch} (100%) rename debian/patches/krd/{0005-certs-genkey.patch => 0004-certs-genkey.patch} (100%) rename debian/patches/mixed-arch/{0001-ZEN-Add-graysky-s-more-ISA-levels-and-uarches.patch => 0001-graysky2-more-ISA-levels-and-uarches.patch} (55%) delete mode 100644 debian/patches/patchset-pf/amd-pstate/0001-cpufreq-amd-pstate-Remove-the-redundant-des_perf-cla.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0002-cpufreq-amd-pstate-Modularize-perf-freq-conversion.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0003-cpufreq-amd-pstate-Remove-the-unnecessary-cpufreq_up.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0004-cpufreq-amd-pstate-Use-scope-based-cleanup-for-cpufr.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0005-cpufreq-amd-pstate-Remove-the-unncecessary-driver_lo.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0006-cpufreq-amd-pstate-Fix-the-clamping-of-perf-values.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0007-cpufreq-amd-pstate-Show-a-warning-when-a-CPU-fails-t.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0008-cpufreq-amd-pstate-Drop-min-and-max-cached-frequenci.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0009-cpufreq-amd-pstate-Move-perf-values-into-a-union.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0010-cpufreq-amd-pstate-Overhaul-locking.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0011-cpufreq-amd-pstate-Drop-cppc_cap1_cached.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0012-cpufreq-amd-pstate-ut-Use-_free-macro-to-free-put-po.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0013-cpufreq-amd-pstate-ut-Allow-lowest-nonlinear-and-low.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0014-cpufreq-amd-pstate-ut-Drop-SUCCESS-and-FAIL-enums.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0015-cpufreq-amd-pstate-ut-Run-on-all-of-the-correct-CPUs.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0016-cpufreq-amd-pstate-ut-Adjust-variable-scope.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0017-cpufreq-amd-pstate-Replace-all-AMD_CPPC_-macros-with.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0018-cpufreq-amd-pstate-Cache-CPPC-request-in-shared-mem-.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0019-cpufreq-amd-pstate-Move-all-EPP-tracing-into-_update.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0020-cpufreq-amd-pstate-Update-cppc_req_cached-for-shared.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0021-cpufreq-amd-pstate-Drop-debug-statements-for-policy-.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0022-cpufreq-amd-pstate-Rework-CPPC-enabling.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0023-cpufreq-amd-pstate-Stop-caching-EPP.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0024-cpufreq-amd-pstate-Drop-actions-in-amd_pstate_epp_cp.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0025-cpufreq-amd-pstate-fix-warning-noticed-by-kernel-tes.patch delete mode 100644 debian/patches/patchset-pf/amd-pstate/0026-cpufreq-amd-pstate-Fix-min_limit-perf-and-freq-updat.patch delete mode 100644 debian/patches/patchset-pf/crypto/0001-crypto-x86-aes-xts-make-the-fast-path-64-bit-specifi.patch delete mode 100644 debian/patches/patchset-pf/crypto/0002-crypto-x86-aes-ctr-rewrite-AESNI-AVX-optimized-CTR-a.patch delete mode 100644 debian/patches/patchset-pf/fixes/0001-Kunit-to-check-the-longest-symbol-length.patch create mode 100644 debian/patches/patchset-pf/fixes/0001-mm-fix-ratelimit_pages-update-error-in-dirty_ratio_h.patch create mode 100644 debian/patches/patchset-pf/fixes/0002-vgacon-Add-check-for-vc_origin-address-range-in-vgac.patch delete mode 100644 debian/patches/patchset-pf/fixes/0002-x86-tools-Drop-duplicate-unlikely-definition-in-insn.patch create mode 100644 debian/patches/patchset-pf/fixes/0003-fbdev-Fix-do_register_framebuffer-to-prevent-null-pt.patch create mode 100644 debian/patches/patchset-pf/fixes/0004-fbdev-Fix-fb_set_var-to-prevent-null-ptr-deref-in-fb.patch delete mode 100644 debian/patches/patchset-pf/fixes/0004-wifi-ath12k-Abort-scan-before-removing-link-interfac.patch delete mode 100644 debian/patches/patchset-pf/fixes/0005-Kconfig-switch-CONFIG_SYSFS_SYCALL-default-to-n.patch create mode 100644 debian/patches/patchset-pf/fixes/0005-anon_inode-use-a-proper-mode-internally.patch create mode 100644 debian/patches/patchset-pf/fixes/0006-anon_inode-explicitly-block-setattr.patch create mode 100644 debian/patches/patchset-pf/fixes/0007-anon_inode-raise-SB_I_NODEV-and-SB_I_NOEXEC.patch create mode 100644 debian/patches/patchset-pf/fixes/0008-fs-add-S_ANON_INODE.patch create mode 100644 debian/patches/patchset-pf/fixes/0009-configfs-Do-not-override-creating-attribute-file-fai.patch create mode 100644 debian/patches/patchset-pf/fixes/0010-Don-t-propagate-mounts-into-detached-trees.patch create mode 100644 debian/patches/patchset-pf/fixes/0011-mm-filemap-gate-dropbehind-invalidate-on-folio-dirty.patch create mode 100644 debian/patches/patchset-pf/fixes/0012-mm-filemap-use-filemap_end_dropbehind-for-read-inval.patch create mode 100644 debian/patches/patchset-pf/fixes/0013-Revert-Disable-FOP_DONTCACHE-for-now-due-to-bugs.patch create mode 100644 debian/patches/patchset-pf/fixes/0014-mm-filemap-unify-read-write-dropbehind-naming.patch create mode 100644 debian/patches/patchset-pf/fixes/0015-mm-filemap-unify-dropbehind-flag-testing-and-clearin.patch create mode 100644 debian/patches/patchset-pf/fixes/0016-mm-khugepaged-fix-race-with-folio-split-free-using-t.patch create mode 100644 debian/patches/patchset-pf/fixes/0017-mm-add-folio_expected_ref_count-for-reference-count-.patch create mode 100644 debian/patches/patchset-pf/fixes/0018-mm-fix-uprobe-pte-be-overwritten-when-expanding-vma.patch create mode 100644 debian/patches/patchset-pf/fixes/0019-mm-hugetlb-unshare-page-tables-during-VMA-split-not-.patch create mode 100644 debian/patches/patchset-pf/fixes/0020-mm-hugetlb-fix-huge_pmd_unshare-vs-GUP-fast-race.patch create mode 100644 debian/patches/patchset-pf/fixes/0021-mm-madvise-handle-madvise_lock-failure-during-race-u.patch create mode 100644 debian/patches/patchset-pf/fixes/0022-video-screen_info-Relocate-framebuffers-behind-PCI-b.patch create mode 100644 debian/patches/patchset-pf/fixes/0023-sysfb-Fix-screen_info-type-check-for-VGA.patch create mode 100644 debian/patches/patchset-pf/fixes/0024-x86-iopl-Cure-TIF_IO_BITMAP-inconsistencies.patch create mode 100644 debian/patches/patchset-pf/fixes/0025-watchdog-fix-watchdog-may-detect-false-positive-of-s.patch create mode 100644 debian/patches/patchset-pf/fixes/0026-sched-rt-Fix-race-in-push_rt_task.patch create mode 100644 debian/patches/patchset-pf/fixes/0027-sched-fair-Adhere-to-place_entity-constraints.patch create mode 100644 debian/patches/patchset-pf/fixes/0028-alloc_tag-handle-module-codetag-load-errors-as-modul.patch create mode 100644 debian/patches/patchset-pf/fixes/0029-svcrdma-Unregister-the-device-if-svc_rdma_accept-fai.patch create mode 100644 debian/patches/patchset-pf/fixes/0030-SUNRPC-Prevent-hang-on-NFS-mount-with-xprtsec-m-tls.patch create mode 100644 debian/patches/patchset-pf/fixes/0031-hv_netvsc-fix-potential-deadlock-in-netvsc_vf_setxdp.patch create mode 100644 debian/patches/patchset-pf/fixes/0032-net-clear-the-dst-when-changing-skb-protocol.patch create mode 100644 debian/patches/patchset-pf/fixes/0033-net_sched-sch_sfq-reject-invalid-perturb-period.patch create mode 100644 debian/patches/patchset-pf/fixes/0034-posix-cpu-timers-fix-race-between-handle_posix_cpu_t.patch create mode 100644 debian/patches/patchset-pf/fixes/0035-mm-vma-reset-VMA-iterator-on-commit_merge-OOM-failur.patch create mode 100644 debian/patches/patchset-pf/fixes/0036-mm-close-theoretical-race-where-stale-TLB-entries-co.patch create mode 100644 debian/patches/patchset-pf/fixes/0037-io_uring-kbuf-don-t-truncate-end-buffer-for-multiple.patch create mode 100644 debian/patches/patchset-pf/fixes/0038-nvme-always-punt-polled-uring_cmd-end_io-work-to-tas.patch create mode 100644 debian/patches/patchset-pf/fixes/0039-block-Clear-BIO_EMULATES_ZONE_APPEND-flag-on-BIO-com.patch create mode 100644 debian/patches/patchset-pf/fixes/0040-block-use-plug-request-list-tail-for-one-shot-backme.patch delete mode 100644 debian/patches/patchset-pf/invlpgb/0001-x86-mm-Remove-pv_ops.mmu.tlb_remove_table-call.patch delete mode 100644 debian/patches/patchset-pf/invlpgb/0002-x86-mm-Consolidate-full-flush-threshold-decision.patch delete mode 100644 debian/patches/patchset-pf/invlpgb/0003-x86-mm-Add-INVLPGB-feature-and-Kconfig-entry.patch delete mode 100644 debian/patches/patchset-pf/invlpgb/0004-x86-mm-Add-INVLPGB-support-code.patch delete mode 100644 debian/patches/patchset-pf/invlpgb/0005-x86-mm-Use-INVLPGB-for-kernel-TLB-flushes.patch delete mode 100644 debian/patches/patchset-pf/invlpgb/0006-x86-mm-Use-broadcast-TLB-flushing-in-page-reclaim.patch delete mode 100644 debian/patches/patchset-pf/invlpgb/0007-x86-mm-Add-global-ASID-allocation-helper-functions.patch delete mode 100644 debian/patches/patchset-pf/invlpgb/0008-x86-mm-Handle-global-ASID-context-switch-and-TLB-flu.patch delete mode 100644 debian/patches/patchset-pf/invlpgb/0009-x86-mm-Add-global-ASID-process-exit-helpers.patch delete mode 100644 debian/patches/patchset-pf/invlpgb/0010-x86-mm-Enable-broadcast-TLB-invalidation-for-multi-t.patch delete mode 100644 debian/patches/patchset-pf/invlpgb/0011-x86-mm-Enable-AMD-translation-cache-extensions.patch delete mode 100644 debian/patches/patchset-pf/invlpgb/0012-x86-mm-Always-set-the-ASID-valid-bit-for-the-INVLPGB.patch delete mode 100644 debian/patches/patchset-pf/invlpgb/0013-x86-mm-Only-do-broadcast-flush-from-reclaim-if-pages.patch delete mode 100644 debian/patches/patchset-pf/invlpgb/0014-x86-mm-Eliminate-window-where-TLB-flushes-may-be-ina.patch rename debian/patches/patchset-pf/{fixes/0003-ice-mark-ice_write_prof_mask_reg-as-noinline.patch => kbuild/0001-ice-mark-ice_write_prof_mask_reg-as-noinline.patch} (95%) rename debian/patches/patchset-pf/{fixes/0006-wifi-mac80211-mark-copy_mesh_setup-as-noinline.patch => kbuild/0002-wifi-mac80211-mark-copy_mesh_setup-as-noinline.patch} (95%) create mode 100644 debian/patches/patchset-pf/nfs/0001-NFSD-unregister-filesystem-in-case-genl_register_fam.patch create mode 100644 debian/patches/patchset-pf/nfs/0002-NFSD-fix-race-between-nfsd-registration-and-exports_.patch create mode 100644 debian/patches/patchset-pf/nfs/0003-nfsd-fix-access-checking-for-NLM-under-XPRTSEC-polic.patch create mode 100644 debian/patches/patchset-pf/nfs/0004-nfsd-nfsd4_spo_must_allow-must-check-this-is-a-v4-co.patch create mode 100644 debian/patches/patchset-pf/nfs/0005-nfsd-Initialize-ssc-before-laundromat_work-to-preven.patch create mode 100644 debian/patches/patchset-pf/nfs/0006-NFSD-Implement-FATTR4_CLONE_BLKSIZE-attribute.patch create mode 100644 debian/patches/patchset-pf/nfs/0007-fs-nfs-read-fix-double-unlock-bug-in-nfs_return_empt.patch create mode 100644 debian/patches/patchset-pf/nfs/0008-NFSv4-Don-t-check-for-OPEN-feature-support-in-v4.1.patch create mode 100644 debian/patches/patchset-pf/nfs/0009-NFS-always-probe-for-LOCALIO-support-asynchronously.patch create mode 100644 debian/patches/patchset-pf/smb/0001-smb-client-add-NULL-check-in-automount_fullpath.patch create mode 100644 debian/patches/patchset-pf/smb/0002-cifs-reset-connections-for-all-channels-when-reconne.patch create mode 100644 debian/patches/patchset-pf/smb/0003-cifs-update-dstaddr-whenever-channel-iface-is-update.patch create mode 100644 debian/patches/patchset-pf/smb/0004-cifs-dns-resolution-is-needed-only-for-primary-chann.patch create mode 100644 debian/patches/patchset-pf/smb/0005-cifs-deal-with-the-channel-loading-lag-while-picking.patch create mode 100644 debian/patches/patchset-pf/smb/0006-cifs-serialize-other-channels-when-query-server-inte.patch create mode 100644 debian/patches/patchset-pf/smb/0007-cifs-do-not-disable-interface-polling-on-failure.patch create mode 100644 debian/patches/patchset-pf/smb/0008-smb-improve-directory-cache-reuse-for-readdir-operat.patch create mode 100644 debian/patches/patchset-pf/xfs/0001-xfs-don-t-assume-perags-are-initialised-when-trimmin.patch delete mode 100644 debian/patches/patchset-pf/zstd/0001-zstd-import-upstream-v1.5.7.patch delete mode 100644 debian/patches/patchset-pf/zstd/0002-lib-zstd-Refactor-intentional-wrap-around-test.patch rename debian/patches/patchset-xanmod/xanmod/{0012-XANMOD-mm-vmscan-Set-minimum-amount-of-swapping.patch => 0012-XANMOD-mm-vmscan-Reduce-amount-of-swapping.patch} (65%) delete mode 100644 debian/patches/patchset-zen/fixes/0002-Bluetooth-hci_event-Fix-not-using-key-encryption-siz.patch create mode 100644 debian/patches/patchset-zen/fixes/0002-x86-cpu-Help-users-notice-when-running-old-Intel-mic.patch delete mode 100644 debian/patches/patchset-zen/ksm/0001-mm-expose-per-process-KSM-control-via-syscalls.patch delete mode 100644 debian/patches/patchset-zen/sauce/0002-VHBA-fix-building-with-kernel-6.14-rc1.patch rename debian/patches/patchset-zen/sauce/{0003-ZEN-PCI-Add-Intel-remapped-NVMe-device-support.patch => 0002-ZEN-PCI-Add-Intel-remapped-NVMe-device-support.patch} (99%) rename debian/patches/patchset-zen/sauce/{0004-ZEN-Disable-stack-conservation-for-GCC.patch => 0003-ZEN-Disable-stack-conservation-for-GCC.patch} (86%) rename debian/patches/patchset-zen/sauce/{0005-ZEN-Input-evdev-use-call_rcu-when-detaching-client.patch => 0004-ZEN-Input-evdev-use-call_rcu-when-detaching-client.patch} (97%) rename debian/patches/patchset-zen/sauce/{0006-ZEN-cpufreq-Remove-schedutil-dependency-on-Intel-AMD.patch => 0005-ZEN-cpufreq-Remove-schedutil-dependency-on-Intel-AMD.patch} (94%) rename debian/patches/patchset-zen/sauce/{0007-ZEN-intel-pstate-Implement-enable-parameter.patch => 0006-ZEN-intel-pstate-Implement-enable-parameter.patch} (93%) rename debian/patches/patchset-zen/sauce/{0008-ZEN-drm-amdgpu-pm-Allow-override-of-min_power_limit-.patch => 0007-ZEN-drm-amdgpu-pm-Allow-override-of-min_power_limit-.patch} (87%) rename debian/patches/patchset-zen/sauce/{0009-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch => 0008-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch} (88%) rename debian/patches/patchset-zen/sauce/{0010-ZEN-ahci-Disable-staggered-spinup-by-default.patch => 0009-ZEN-ahci-Disable-staggered-spinup-by-default.patch} (93%) rename debian/patches/patchset-zen/sauce/{0011-ZEN-kernel-Kconfig.preempt-Remove-EXPERT-conditional.patch => 0010-ZEN-kernel-Kconfig.preempt-Remove-EXPERT-conditional.patch} (91%) rename debian/patches/patchset-zen/sauce/{0012-ZEN-INTERACTIVE-Base-config-item.patch => 0011-ZEN-INTERACTIVE-Base-config-item.patch} (80%) rename debian/patches/patchset-zen/sauce/{0013-ZEN-INTERACTIVE-Use-BFQ-as-the-elevator-for-SQ-devic.patch => 0012-ZEN-INTERACTIVE-Use-BFQ-as-the-elevator-for-SQ-devic.patch} (89%) rename debian/patches/patchset-zen/sauce/{0014-ZEN-INTERACTIVE-Use-Kyber-as-the-elevator-for-MQ-dev.patch => 0013-ZEN-INTERACTIVE-Use-Kyber-as-the-elevator-for-MQ-dev.patch} (90%) rename debian/patches/patchset-zen/sauce/{0015-ZEN-INTERACTIVE-Enable-background-reclaim-of-hugepag.patch => 0014-ZEN-INTERACTIVE-Enable-background-reclaim-of-hugepag.patch} (95%) rename debian/patches/patchset-zen/sauce/{0016-ZEN-INTERACTIVE-Tune-EEVDF-for-interactivity.patch => 0015-ZEN-INTERACTIVE-Tune-EEVDF-for-interactivity.patch} (88%) rename debian/patches/patchset-zen/sauce/{0017-ZEN-INTERACTIVE-Tune-ondemand-governor-for-interacti.patch => 0016-ZEN-INTERACTIVE-Tune-ondemand-governor-for-interacti.patch} (97%) rename debian/patches/patchset-zen/sauce/{0018-ZEN-INTERACTIVE-mm-Disable-unevictable-compaction.patch => 0017-ZEN-INTERACTIVE-mm-Disable-unevictable-compaction.patch} (85%) rename debian/patches/patchset-zen/sauce/{0019-ZEN-INTERACTIVE-mm-Disable-watermark-boosting-by-def.patch => 0018-ZEN-INTERACTIVE-mm-Disable-watermark-boosting-by-def.patch} (89%) rename debian/patches/patchset-zen/sauce/{0020-ZEN-INTERACTIVE-mm-Lower-the-non-hugetlbpage-pageblo.patch => 0019-ZEN-INTERACTIVE-mm-Lower-the-non-hugetlbpage-pageblo.patch} (95%) rename debian/patches/patchset-zen/sauce/{0021-ZEN-INTERACTIVE-dm-crypt-Disable-workqueues-for-cryp.patch => 0020-ZEN-INTERACTIVE-dm-crypt-Disable-workqueues-for-cryp.patch} (88%) rename debian/patches/patchset-zen/sauce/{0022-ZEN-INTERACTIVE-mm-swap-Disable-swap-in-readahead.patch => 0021-ZEN-INTERACTIVE-mm-swap-Disable-swap-in-readahead.patch} (84%) rename debian/patches/patchset-zen/sauce/{0023-ZEN-INTERACTIVE-Document-PDS-BMQ-configuration.patch => 0022-ZEN-INTERACTIVE-Document-PDS-BMQ-configuration.patch} (85%) diff --git a/debian/bin/genpatch-pfkernel b/debian/bin/genpatch-pf similarity index 70% rename from debian/bin/genpatch-pfkernel rename to debian/bin/genpatch-pf index b948e6c..773bb5c 100755 --- a/debian/bin/genpatch-pfkernel +++ b/debian/bin/genpatch-pf @@ -5,9 +5,9 @@ export GIT_OPTIONAL_LOCKS=0 w=$(git rev-parse --path-format=absolute --show-toplevel) ; : "${w:?}" ; cd "$w" -dst='debian/patches/pf-tmp' +dst='debian/patches/tmp-pf' src='../linux-extras' -branches='amd-pstate cpuidle crypto exfat fixes fuse invlpgb kbuild nfs smb zstd' +branches='fixes archlinux cpuidle kbuild nfs smb xfs' if [ -d "${dst}" ] ; then rm -rf "${dst}" ; fi mkdir -p "${dst}" @@ -34,19 +34,17 @@ for b in ${branches} ; do git switch --detach "${ref}" git switch -C "$r" + rm -rf "$w/${dst}/$b" ; mkdir -p "$w/${dst}/$b" if git rebase "${from}" ; then - [ -d "$w/${dst}/$b/" ] || mkdir -p "$w/${dst}/$b" - - set +e - env -C "$w" git ls-files -z | grep -zF "${dst}/$b/" | grep -zFv '/.' | env -C "$w" -u GIT_OPTIONAL_LOCKS xargs -r -0 git rm -f - find "$w/${dst}/$b/" -name '*.patch' -type f -exec rm -f {} + - set -e - git format-patch -N --subject-prefix='' --output-directory "$w/${dst}/$b" "${from}..$r" else echo >&2 git rebase --abort - echo >&2 + + touch "$w/${dst}/$b/0000-rebase-failed" + + base=$(git merge-base "${from}" "${ref}") + git format-patch -N --subject-prefix='' --output-directory "$w/${dst}/$b" "${base}..${ref}" fi git switch -q --detach "${ref}" @@ -57,6 +55,6 @@ done cd "$w" ; rm -rf "$t" echo >&2 -echo 'put in debian/patches/series' >&2 +echo 'output:' >&2 echo >&2 find "${dst}/" -type f -name '*.patch' | sed -E 's#^debian/patches/##' | sort -V diff --git a/debian/bin/genpatch-zen b/debian/bin/genpatch-zen new file mode 100755 index 0000000..a961d83 --- /dev/null +++ b/debian/bin/genpatch-zen @@ -0,0 +1,60 @@ +#!/bin/sh +set -ef + +export GIT_OPTIONAL_LOCKS=0 + +w=$(git rev-parse --path-format=absolute --show-toplevel) ; : "${w:?}" ; cd "$w" + +dst='debian/patches/tmp-zen' +src='../linux-extras' +branches='zen-sauce fixes' + +if [ -d "${dst}" ] ; then rm -rf "${dst}" ; fi +mkdir -p "${dst}" + +kver= +if [ -n "$1" ] ; then + kver="$1" +else + kver=$(dpkg-parsechangelog --show-field=Version | sed -E 's/^[0-9]+://;s/-[^-]*$//' | cut -d. -f1-2) +fi +from="upstream/linux-${kver}.y" + +t=$(mktemp -d) ; : "${t:?}" + +cp -ar "${src}" "$t/" +cd "$t/${src##*/}" + +git config advice.skippedCherryPicks false + +for b in ${branches} ; do + ref="zen/${kver}/$b" + r="tmp-rebase-$b" + + git switch --detach "${ref}" + git switch -C "$r" + + rm -rf "$w/${dst}/$b" ; mkdir -p "$w/${dst}/$b" + if git rebase "${from}" ; then + git format-patch -N --subject-prefix='' --output-directory "$w/${dst}/$b" "${from}..$r" + else + echo >&2 + git rebase --abort + + touch "$w/${dst}/$b/0000-rebase-failed" + + base=$(git merge-base "${from}" "${ref}") + git format-patch -N --subject-prefix='' --output-directory "$w/${dst}/$b" "${base}..${ref}" + fi + + git switch -q --detach "${ref}" + git branch -D "$r" + echo >&2 +done + +cd "$w" ; rm -rf "$t" + +echo >&2 +echo 'output:' >&2 +echo >&2 +find "${dst}/" -type f -name '*.patch' | sed -E 's#^debian/patches/##' | sort -V diff --git a/debian/changelog b/debian/changelog index 9a4e8d3..0b19e6d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,86 +1,8 @@ -linux (6.14.11-1) sid; urgency=medium +linux (6.15.2-1) sid; urgency=medium * New upstream stable update: - https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.11 + https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.15.1 + https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.15.2 + * New upstream release: https://kernelnewbies.org/Linux_6.15 - -- Konstantin Demin Tue, 10 Jun 2025 15:40:46 +0300 - -linux (6.14.10-1) sid; urgency=medium - - * New upstream stable update: - https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.10 - - -- Konstantin Demin Wed, 04 Jun 2025 16:09:02 +0300 - -linux (6.14.9-1) sid; urgency=medium - - * New upstream stable update: - https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.9 - - -- Konstantin Demin Thu, 29 May 2025 15:08:18 +0300 - -linux (6.14.8-1) sid; urgency=medium - - * New upstream stable update: - https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.8 - - -- Konstantin Demin Thu, 22 May 2025 17:02:41 +0300 - -linux (6.14.7-1) sid; urgency=medium - - * New upstream stable update: - https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.7 - - -- Konstantin Demin Sun, 18 May 2025 11:56:49 +0300 - -linux (6.14.6-1) sid; urgency=medium - - * New upstream stable update: - https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.6 - - -- Konstantin Demin Fri, 09 May 2025 12:23:42 +0300 - -linux (6.14.5-1) sid; urgency=medium - - * New upstream stable update: - https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.5 - - -- Konstantin Demin Fri, 02 May 2025 16:25:21 +0300 - -linux (6.14.4-1) sid; urgency=medium - - * New upstream stable update: - https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.4 - - -- Konstantin Demin Fri, 25 Apr 2025 20:05:32 +0300 - -linux (6.14.3-1) sid; urgency=medium - - * New upstream stable update: - https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.3 - - -- Konstantin Demin Mon, 21 Apr 2025 01:31:34 +0300 - -linux (6.14.2-1) sid; urgency=medium - - * New upstream stable update: - https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.2 - - -- Konstantin Demin Fri, 11 Apr 2025 00:21:57 +0300 - -linux (6.14.1-1) sid; urgency=medium - - * New upstream stable update: - https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.1 - - -- Konstantin Demin Mon, 07 Apr 2025 12:41:44 +0300 - -linux (6.14-1) sid; urgency=medium - - * Sync with Debian. - * Refresh patches. - * Refine configs. - * New upstream release: https://kernelnewbies.org/Linux_6.13 - * New upstream release: https://kernelnewbies.org/Linux_6.14 - - -- Konstantin Demin Thu, 27 Mar 2025 01:51:03 +0300 + -- Konstantin Demin Tue, 17 Jun 2025 12:18:45 +0300 diff --git a/debian/config/amd64/config.cloud b/debian/config/amd64/config.cloud index 299cc7e..5a2d7d1 100644 --- a/debian/config/amd64/config.cloud +++ b/debian/config/amd64/config.cloud @@ -92,8 +92,6 @@ CONFIG_IO_DELAY_NONE=y ## file: crypto/Kconfig ## CONFIG_CRYPTO_ECDH=m -CONFIG_CRYPTO_CTS=m -CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_DEFLATE=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m @@ -305,6 +303,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y ## CONFIG_CXL_BUS=y # CONFIG_CXL_MEM_RAW_COMMANDS is not set +# CONFIG_CXL_FEATURES is not set ## ## file: drivers/devfreq/Kconfig @@ -397,6 +396,11 @@ CONFIG_GOOGLE_COREBOOT_TABLE=m CONFIG_GOOGLE_FRAMEBUFFER_COREBOOT=m CONFIG_GOOGLE_VPD=m +## +## file: drivers/fwctl/Kconfig +## +# CONFIG_FWCTL is not set + ## ## file: drivers/gnss/Kconfig ## @@ -437,6 +441,11 @@ CONFIG_GOOGLE_VPD=m ## # CONFIG_HTE is not set +## +## file: drivers/hv/Kconfig +## +# CONFIG_MSHV_ROOT is not set + ## ## file: drivers/hwmon/Kconfig ## @@ -1376,6 +1385,11 @@ CONFIG_PCIEASPM_DEFAULT=y # CONFIG_PCIEASPM_POWERSAVE is not set ## end choice +## +## file: drivers/pci/pwrctrl/Kconfig +## +# CONFIG_PCI_PWRCTL_SLOT is not set + ## ## file: drivers/pci/switch/Kconfig ## @@ -1465,6 +1479,11 @@ CONFIG_PCIEASPM_DEFAULT=y ## CONFIG_PPS=m +## +## file: drivers/pps/generators/Kconfig +## +# CONFIG_PPS_GENERATOR_TIO is not set + ## ## file: drivers/ptp/Kconfig ## @@ -1655,6 +1674,7 @@ CONFIG_SCSI_MPI3MR=m # CONFIG_SERIAL_8250_RSA is not set # CONFIG_SERIAL_8250_RT288X is not set # CONFIG_SERIAL_8250_MID is not set +# CONFIG_SERIAL_8250_NI is not set ## ## file: drivers/ufs/Kconfig @@ -1721,6 +1741,7 @@ CONFIG_WATCHDOG_PRETIMEOUT_GOV_NOOP=m CONFIG_WATCHDOG_PRETIMEOUT_GOV_PANIC=m CONFIG_SOFT_WATCHDOG_PRETIMEOUT=y # CONFIG_LENOVO_SE10_WDT is not set +# CONFIG_LENOVO_SE30_WDT is not set # CONFIG_XILINX_WATCHDOG is not set # CONFIG_CADENCE_WATCHDOG is not set # CONFIG_DW_WATCHDOG is not set @@ -1945,11 +1966,6 @@ CONFIG_PROC_VMCORE=y ## # CONFIG_CIFS is not set -## -## file: fs/sysv/Kconfig -## -# CONFIG_SYSV_FS is not set - ## ## file: fs/ufs/Kconfig ## @@ -2049,6 +2065,7 @@ CONFIG_PANIC_TIMEOUT=5 ## file: mm/Kconfig ## # CONFIG_ZSWAP is not set +CONFIG_ZSMALLOC=m # CONFIG_HWPOISON_INJECT is not set # CONFIG_NUMA_EMU is not set @@ -2310,6 +2327,7 @@ CONFIG_IPE_PROP_DM_VERITY_SIGNATURE=y ## file: security/keys/Kconfig ## # CONFIG_KEYS_REQUEST_CACHE is not set +# CONFIG_BIG_KEYS is not set # CONFIG_TRUSTED_KEYS is not set # CONFIG_USER_DECRYPTED_DATA is not set @@ -2327,7 +2345,16 @@ CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION=y CONFIG_ARCH_SELECTS_KEXEC_FILE=y CONFIG_BLK_DEV_RNBD=y CONFIG_CRASH_RESERVE=y +CONFIG_CRYPTO_CHACHA20_X86_64=m CONFIG_CRYPTO_LIB_AESCFB=m +CONFIG_CRYPTO_LIB_CHACHA=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m +CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m +CONFIG_CRYPTO_LIB_CHACHA_INTERNAL=m +CONFIG_CRYPTO_LIB_POLY1305=m +CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m +CONFIG_CRYPTO_LIB_POLY1305_INTERNAL=m +CONFIG_CRYPTO_POLY1305_X86_64=m CONFIG_CXL_PORT=y CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS=y CONFIG_INFINIBAND_RTRS=m diff --git a/debian/config/amd64/config.mobile b/debian/config/amd64/config.mobile index 43741c1..d35a050 100644 --- a/debian/config/amd64/config.mobile +++ b/debian/config/amd64/config.mobile @@ -108,8 +108,6 @@ CONFIG_SYSTEM_BLACKLIST_AUTH_UPDATE=y ## file: crypto/Kconfig ## CONFIG_CRYPTO_ECDH=y -CONFIG_CRYPTO_CTS=y -CONFIG_CRYPTO_XTS=y CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_842=y CONFIG_CRYPTO_LZ4=y @@ -557,6 +555,7 @@ CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION=y ## CONFIG_CXL_BUS=m CONFIG_CXL_MEM_RAW_COMMANDS=y +CONFIG_CXL_FEATURES=y ## ## file: drivers/devfreq/Kconfig @@ -639,6 +638,9 @@ CONFIG_EDAC_LEGACY_SYSFS=y # CONFIG_EDAC_DEBUG is not set CONFIG_EDAC_DECODE_MCE=y CONFIG_EDAC_GHES=y +# CONFIG_EDAC_SCRUB is not set +# CONFIG_EDAC_ECS is not set +# CONFIG_EDAC_MEM_REPAIR is not set CONFIG_EDAC_AMD64=m CONFIG_EDAC_E752X=m CONFIG_EDAC_I82975X=m @@ -723,6 +725,13 @@ CONFIG_FSI_SBEFIFO=m CONFIG_FSI_OCC=m CONFIG_I2CR_SCOM=m +## +## file: drivers/fwctl/Kconfig +## +CONFIG_FWCTL=m +CONFIG_FWCTL_MLX5=m +CONFIG_FWCTL_PDS=m + ## ## file: drivers/gnss/Kconfig ## @@ -899,6 +908,7 @@ CONFIG_DRM_AST=m CONFIG_DRM_CHIPONE_ICN6211=m CONFIG_DRM_CHRONTEL_CH7033=m CONFIG_DRM_DISPLAY_CONNECTOR=m +CONFIG_DRM_I2C_NXP_TDA998X=m CONFIG_DRM_ITE_IT6263=m CONFIG_DRM_ITE_IT6505=m CONFIG_DRM_LONTIUM_LT8912B=m @@ -988,14 +998,6 @@ CONFIG_DRM_GUD=m ## CONFIG_DRM_HISI_HIBMC=m -## -## file: drivers/gpu/drm/i2c/Kconfig -## -CONFIG_DRM_I2C_CH7006=m -CONFIG_DRM_I2C_SIL164=m -CONFIG_DRM_I2C_NXP_TDA998X=m -CONFIG_DRM_I2C_NXP_TDA9950=m - ## ## file: drivers/gpu/drm/i915/Kconfig ## @@ -1058,6 +1060,8 @@ CONFIG_NOUVEAU_DEBUG_DEFAULT=3 CONFIG_DRM_NOUVEAU_BACKLIGHT=y # CONFIG_DRM_NOUVEAU_SVM is not set # CONFIG_DRM_NOUVEAU_GSP_DEFAULT is not set +CONFIG_DRM_NOUVEAU_CH7006=m +CONFIG_DRM_NOUVEAU_SIL164=m ## ## file: drivers/gpu/drm/panel/Kconfig @@ -1119,6 +1123,7 @@ CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS=m CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00=m # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set CONFIG_DRM_PANEL_RAYDIUM_RM67191=m +CONFIG_DRM_PANEL_RAYDIUM_RM67200=m CONFIG_DRM_PANEL_RAYDIUM_RM68200=m CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m CONFIG_DRM_PANEL_RAYDIUM_RM69380=m @@ -1156,6 +1161,7 @@ CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521=m CONFIG_DRM_PANEL_STARTEK_KD070FHFID015=m CONFIG_DRM_PANEL_EDP=m CONFIG_DRM_PANEL_SIMPLE=m +CONFIG_DRM_PANEL_SUMMIT=m CONFIG_DRM_PANEL_SYNAPTICS_R63353=m CONFIG_DRM_PANEL_TDO_TL070WSH30=m CONFIG_DRM_PANEL_TPO_TD028TTEC1=m @@ -1164,6 +1170,7 @@ CONFIG_DRM_PANEL_TPO_TPG110=m # CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set CONFIG_DRM_PANEL_VISIONOX_R66451=m # CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set +CONFIG_DRM_PANEL_VISIONOX_RM692E5=m CONFIG_DRM_PANEL_VISIONOX_VTDR6130=m CONFIG_DRM_PANEL_WIDECHIPS_WS2401=m CONFIG_DRM_PANEL_XINPENG_XPP055C272=m @@ -1189,6 +1196,7 @@ CONFIG_DRM_SSD130X_SPI=m ## ## file: drivers/gpu/drm/tiny/Kconfig ## +CONFIG_DRM_APPLETBDRM=m CONFIG_DRM_ARCPGU=m CONFIG_DRM_BOCHS=m CONFIG_DRM_CIRRUS_QEMU=m @@ -1238,6 +1246,8 @@ CONFIG_DRM_VMWGFX=m ## CONFIG_DRM_XE=m CONFIG_DRM_XE_DISPLAY=y +CONFIG_DRM_XE_DP_TUNNEL=y +CONFIG_DRM_XE_DEVMEM_MIRROR=y CONFIG_DRM_XE_FORCE_PROBE="" ## @@ -1296,6 +1306,8 @@ CONFIG_HID_ACRUX=m CONFIG_HID_ACRUX_FF=y CONFIG_HID_APPLE=m CONFIG_HID_APPLEIR=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m CONFIG_HID_ASUS=m CONFIG_HID_AUREAL=m CONFIG_HID_BELKIN=m @@ -1481,6 +1493,11 @@ CONFIG_HSI_CHAR=m ## CONFIG_HTE=y +## +## file: drivers/hv/Kconfig +## +CONFIG_MSHV_ROOT=m + ## ## file: drivers/hwmon/Kconfig ## @@ -1515,6 +1532,7 @@ CONFIG_SENSORS_FAM15H_POWER=m CONFIG_SENSORS_APPLESMC=m CONFIG_SENSORS_ASB100=m CONFIG_SENSORS_ATXP1=m +CONFIG_SENSORS_CGBC=m CONFIG_SENSORS_CHIPCAP2=m CONFIG_SENSORS_CORSAIR_CPRO=m CONFIG_SENSORS_CORSAIR_PSU=m @@ -1541,6 +1559,7 @@ CONFIG_SENSORS_G762=m CONFIG_SENSORS_GPIO_FAN=m CONFIG_SENSORS_HIH6130=m CONFIG_SENSORS_HS3001=m +CONFIG_SENSORS_HTU31=m CONFIG_SENSORS_IBMAEM=m CONFIG_SENSORS_IBMPEX=m CONFIG_SENSORS_I5500=m @@ -1713,6 +1732,7 @@ CONFIG_SENSORS_DELTA_AHE50DC_FAN=m CONFIG_SENSORS_FSP_3Y=m CONFIG_SENSORS_IBM_CFFPS=m CONFIG_SENSORS_DPS920AB=m +CONFIG_SENSORS_INA233=m CONFIG_SENSORS_INSPUR_IPSPS=m CONFIG_SENSORS_IR35221=m CONFIG_SENSORS_IR36021=m @@ -2043,7 +2063,6 @@ CONFIG_INPUT_TWL4030_VIBRA=m CONFIG_INPUT_TWL6040_VIBRA=m CONFIG_INPUT_UINPUT=m CONFIG_INPUT_PALMAS_PWRBUTTON=m -CONFIG_INPUT_PCF50633_PMU=m CONFIG_INPUT_PCF8574=m CONFIG_INPUT_PWM_BEEPER=m CONFIG_INPUT_PWM_VIBRA=m @@ -2355,6 +2374,7 @@ CONFIG_LEDS_MC13783=m CONFIG_LEDS_TCA6507=m CONFIG_LEDS_TLC591XX=m CONFIG_LEDS_MAX77650=m +CONFIG_LEDS_MAX77705=m CONFIG_LEDS_MAX8997=m CONFIG_LEDS_LM355x=m CONFIG_LEDS_MENF21BMC=m @@ -2406,7 +2426,7 @@ CONFIG_LEDS_QCOM_LPG=m CONFIG_LEDS_MT6370_RGB=m ## -## file: drivers/leds/simple/Kconfig +## file: drivers/leds/simatic/Kconfig ## CONFIG_LEDS_SIEMENS_SIMATIC_IPC=m CONFIG_LEDS_SIEMENS_SIMATIC_IPC_APOLLOLAKE=m @@ -2496,6 +2516,7 @@ CONFIG_MEDIA_CEC_SUPPORT=y ## file: drivers/media/cec/i2c/Kconfig ## CONFIG_CEC_CH7322=m +CONFIG_CEC_NXP_TDA9950=m ## ## file: drivers/media/cec/platform/Kconfig @@ -2558,6 +2579,7 @@ CONFIG_VIDEO_MSP3400=m # CONFIG_VIDEO_BT856 is not set # CONFIG_VIDEO_BT866 is not set # CONFIG_VIDEO_ISL7998X is not set +CONFIG_VIDEO_LT6911UXE=m # CONFIG_VIDEO_KS0127 is not set # CONFIG_VIDEO_MAX9286 is not set # CONFIG_VIDEO_ML86V7667 is not set @@ -2883,6 +2905,7 @@ CONFIG_MFD_MAX77620=y CONFIG_MFD_MAX77650=m CONFIG_MFD_MAX77686=m CONFIG_MFD_MAX77693=m +CONFIG_MFD_MAX77705=m CONFIG_MFD_MAX77714=m CONFIG_MFD_MAX77843=y CONFIG_MFD_MAX8907=m @@ -2899,9 +2922,6 @@ CONFIG_MFD_CPCAP=m CONFIG_MFD_VIPERBOARD=m CONFIG_MFD_NTXEC=m CONFIG_MFD_RETU=m -CONFIG_MFD_PCF50633=m -CONFIG_PCF50633_ADC=m -CONFIG_PCF50633_GPIO=m CONFIG_MFD_SY7636A=m CONFIG_MFD_RDC321X=m CONFIG_MFD_RT4831=m @@ -3933,6 +3953,7 @@ CONFIG_8139TOO_TUNE_TWISTER=y CONFIG_8139TOO_8129=y # CONFIG_8139_OLD_RX_RESET is not set CONFIG_R8169=m +CONFIG_R8169_LEDS=y CONFIG_RTASE=m ## @@ -4120,6 +4141,7 @@ CONFIG_IEEE802154_HWSIM=m CONFIG_MCTP_SERIAL=m CONFIG_MCTP_TRANSPORT_I2C=m CONFIG_MCTP_TRANSPORT_I3C=m +CONFIG_MCTP_TRANSPORT_USB=m ## ## file: drivers/net/mdio/Kconfig @@ -4220,6 +4242,7 @@ CONFIG_QCA807X_PHY=m ## file: drivers/net/phy/realtek/Kconfig ## CONFIG_REALTEK_PHY=m +CONFIG_REALTEK_PHY_HWMON=y ## ## file: drivers/net/plip/Kconfig @@ -4546,6 +4569,7 @@ CONFIG_IWLEGACY_DEBUGFS=y CONFIG_IWLWIFI=m CONFIG_IWLDVM=m CONFIG_IWLMVM=m +CONFIG_IWLMLD=m # CONFIG_IWLWIFI_DEBUG is not set CONFIG_IWLWIFI_DEBUGFS=y # CONFIG_IWLWIFI_DEVICE_TRACING is not set @@ -4765,6 +4789,8 @@ CONFIG_RTW88_8821CS=m CONFIG_RTW88_8821CU=m CONFIG_RTW88_8821AU=m CONFIG_RTW88_8812AU=m +CONFIG_RTW88_8814AE=m +CONFIG_RTW88_8814AU=m CONFIG_RTW88_DEBUG=y CONFIG_RTW88_DEBUGFS=y @@ -5045,6 +5071,7 @@ CONFIG_PCIE_CADENCE_PLAT_HOST=y ## ## file: drivers/pci/controller/dwc/Kconfig ## +# CONFIG_PCIE_DW_DEBUGFS is not set CONFIG_PCI_MESON=y CONFIG_PCIE_INTEL_GW=y CONFIG_PCIE_DW_PLAT_HOST=y @@ -5073,6 +5100,11 @@ CONFIG_PCIE_ECRC=y CONFIG_PCIEASPM_POWERSAVE=y ## end choice +## +## file: drivers/pci/pwrctrl/Kconfig +## +CONFIG_PCI_PWRCTL_SLOT=m + ## ## file: drivers/pci/switch/Kconfig ## @@ -5163,6 +5195,7 @@ CONFIG_PINMUX=y CONFIG_PINCONF=y # CONFIG_DEBUG_PINCTRL is not set CONFIG_PINCTRL_AMD=y +CONFIG_PINCTRL_AMDISP=m CONFIG_PINCTRL_AS3722=m CONFIG_PINCTRL_AXP209=m CONFIG_PINCTRL_AW9523=m @@ -5249,6 +5282,7 @@ CONFIG_GPD_POCKET_FAN=m CONFIG_WIRELESS_HOTKEY=m CONFIG_IBM_RTL=m CONFIG_IDEAPAD_LAPTOP=m +CONFIG_LENOVO_WMI_HOTKEY_UTILITIES=m CONFIG_LENOVO_YMC=m CONFIG_SENSORS_HDAPS=m CONFIG_THINKPAD_ACPI=m @@ -5267,6 +5301,7 @@ CONFIG_MSI_WMI=m CONFIG_MSI_WMI_PLATFORM=m CONFIG_PCENGINES_APU2=m CONFIG_BARCO_P50_GPIO=m +CONFIG_SAMSUNG_GALAXYBOOK=m CONFIG_SAMSUNG_LAPTOP=m CONFIG_SAMSUNG_Q10=m CONFIG_TOSHIBA_BT_RFKILL=m @@ -5281,7 +5316,6 @@ CONFIG_SONYPI_COMPAT=y CONFIG_SYSTEM76_ACPI=m CONFIG_TOPSTAR_LAPTOP=m CONFIG_SERIAL_MULTI_INSTANTIATE=m -CONFIG_MLX_PLATFORM=m CONFIG_INSPUR_PLATFORM_PROFILE=m CONFIG_LENOVO_WMI_CAMERA=m CONFIG_INTEL_IPS=m @@ -5321,6 +5355,8 @@ CONFIG_AMD_PMF=m ## CONFIG_X86_PLATFORM_DRIVERS_DELL=y CONFIG_ALIENWARE_WMI=m +CONFIG_ALIENWARE_WMI_LEGACY=y +CONFIG_ALIENWARE_WMI_WMAX=y CONFIG_DCDBAS=m CONFIG_DELL_LAPTOP=m CONFIG_DELL_RBU=m @@ -5498,7 +5534,6 @@ CONFIG_BATTERY_MAX17042=m CONFIG_BATTERY_MAX1720X=m CONFIG_BATTERY_MAX1721X=m CONFIG_CHARGER_88PM860X=m -CONFIG_CHARGER_PCF50633=m CONFIG_CHARGER_ISP1704=m CONFIG_CHARGER_MAX8903=m CONFIG_CHARGER_LP8727=m @@ -5510,6 +5545,7 @@ CONFIG_CHARGER_MAX14577=m CONFIG_CHARGER_DETECTOR_MAX14656=m CONFIG_CHARGER_MAX77650=m CONFIG_CHARGER_MAX77693=m +CONFIG_CHARGER_MAX77705=m CONFIG_CHARGER_MAX77976=m CONFIG_CHARGER_MAX8997=m CONFIG_CHARGER_MAX8998=m @@ -5556,6 +5592,11 @@ CONFIG_PPS=y ## CONFIG_PPS_CLIENT_PARPORT=m +## +## file: drivers/pps/generators/Kconfig +## +CONFIG_PPS_GENERATOR_TIO=m + ## ## file: drivers/ptp/Kconfig ## @@ -5727,8 +5768,8 @@ CONFIG_REGULATOR_MT6370=m CONFIG_REGULATOR_MT6397=m CONFIG_REGULATOR_PALMAS=m CONFIG_REGULATOR_PCA9450=m +CONFIG_REGULATOR_PF9453=m CONFIG_REGULATOR_PCAP=m -CONFIG_REGULATOR_PCF50633=m CONFIG_REGULATOR_PF8X00=m CONFIG_REGULATOR_PFUZE100=m CONFIG_REGULATOR_PV88060=m @@ -5916,7 +5957,6 @@ CONFIG_RTC_DRV_MSM6242=m CONFIG_RTC_DRV_RP5C01=m CONFIG_RTC_DRV_WM831X=m CONFIG_RTC_DRV_WM8350=m -CONFIG_RTC_DRV_PCF50633=m CONFIG_RTC_DRV_ZYNQMP=m CONFIG_RTC_DRV_NTXEC=m CONFIG_RTC_DRV_CADENCE=m @@ -6041,6 +6081,7 @@ CONFIG_SPI_TLE62X0=m CONFIG_SPI_SLAVE=y CONFIG_SPI_SLAVE_TIME=m CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m +CONFIG_SPI_OFFLOAD_TRIGGER_PWM=m ## ## file: drivers/spmi/Kconfig @@ -6258,6 +6299,7 @@ CONFIG_SERIAL_8250_PCI1XXXX=m CONFIG_SERIAL_8250_RSA=y CONFIG_SERIAL_8250_RT288X=y CONFIG_SERIAL_8250_MID=y +CONFIG_SERIAL_8250_NI=m CONFIG_SERIAL_OF_PLATFORM=m ## @@ -6707,6 +6749,7 @@ CONFIG_TYPEC_MUX_PI3USB30532=m CONFIG_TYPEC_MUX_INTEL_PMC=m CONFIG_TYPEC_MUX_IT5205=m CONFIG_TYPEC_MUX_NB7VPQ904M=m +CONFIG_TYPEC_MUX_PS883X=m CONFIG_TYPEC_MUX_PTN36502=m CONFIG_TYPEC_MUX_TUSB1046=m CONFIG_TYPEC_MUX_WCD939X_USBSS=m @@ -6819,7 +6862,6 @@ CONFIG_BACKLIGHT_ADP5520=m CONFIG_BACKLIGHT_ADP8860=m CONFIG_BACKLIGHT_ADP8870=m CONFIG_BACKLIGHT_88PM860X=m -CONFIG_BACKLIGHT_PCF50633=m CONFIG_BACKLIGHT_AAT2870=m CONFIG_BACKLIGHT_LM3509=m CONFIG_BACKLIGHT_LM3630A=m @@ -6913,6 +6955,7 @@ CONFIG_DA9063_WATCHDOG=m CONFIG_DA9062_WATCHDOG=m CONFIG_GPIO_WATCHDOG=m CONFIG_LENOVO_SE10_WDT=m +CONFIG_LENOVO_SE30_WDT=m CONFIG_MENF21BMC_WATCHDOG=m CONFIG_WM831X_WATCHDOG=m CONFIG_WM8350_WATCHDOG=m @@ -7242,11 +7285,6 @@ CONFIG_CIFS_SWN_UPCALL=y CONFIG_CIFS_FSCACHE=y # CONFIG_CIFS_COMPRESSION is not set -## -## file: fs/sysv/Kconfig -## -CONFIG_SYSV_FS=m - ## ## file: fs/ubifs/Kconfig ## @@ -7389,8 +7427,6 @@ CONFIG_TEST_DHRY=m # CONFIG_ASYNC_RAID6_TEST is not set # CONFIG_TEST_HEXDUMP is not set # CONFIG_TEST_KSTRTOX is not set -# CONFIG_TEST_PRINTF is not set -# CONFIG_TEST_SCANF is not set # CONFIG_TEST_BITMAP is not set # CONFIG_TEST_UUID is not set # CONFIG_TEST_XARRAY is not set @@ -7402,7 +7438,6 @@ CONFIG_TEST_DHRY=m # CONFIG_TEST_BITOPS is not set # CONFIG_TEST_VMALLOC is not set CONFIG_TEST_BPF=m -CONFIG_TEST_BLACKHOLE_DEV=m # CONFIG_FIND_BIT_BENCHMARK is not set CONFIG_TEST_FIRMWARE=m # CONFIG_TEST_SYSCTL is not set @@ -7446,12 +7481,9 @@ CONFIG_ZSWAP_SHRINKER_DEFAULT_ON=y CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD=y ## end choice ## choice: Default allocator -CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y -# CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD_DEPRECATED is not set -# CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set +CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC=y ## end choice -CONFIG_ZBUD=y -CONFIG_Z3FOLD_DEPRECATED=m +CONFIG_ZSMALLOC=y CONFIG_HWPOISON_INJECT=m CONFIG_NUMA_EMU=y @@ -7873,6 +7905,7 @@ CONFIG_IPE_POLICY_SIG_PLATFORM_KEYRING=y ## file: security/keys/Kconfig ## CONFIG_KEYS_REQUEST_CACHE=y +CONFIG_BIG_KEYS=y CONFIG_TRUSTED_KEYS=m CONFIG_USER_DECRYPTED_DATA=y @@ -8146,6 +8179,7 @@ CONFIG_SND_SOC_AK5558=m CONFIG_SND_SOC_ALC5623=m CONFIG_SND_SOC_AW8738=m CONFIG_SND_SOC_AW88395=m +CONFIG_SND_SOC_AW88166=m CONFIG_SND_SOC_AW88261=m CONFIG_SND_SOC_AW88081=m CONFIG_SND_SOC_AW87390=m @@ -8430,6 +8464,7 @@ CONFIG_SND_SOC_INTEL_AVS_MACH_MAX98927=m CONFIG_SND_SOC_INTEL_AVS_MACH_MAX98357A=m CONFIG_SND_SOC_INTEL_AVS_MACH_MAX98373=m CONFIG_SND_SOC_INTEL_AVS_MACH_NAU8825=m +CONFIG_SND_SOC_INTEL_AVS_MACH_PCM3168A=m CONFIG_SND_SOC_INTEL_AVS_MACH_PROBE=m CONFIG_SND_SOC_INTEL_AVS_MACH_RT274=m CONFIG_SND_SOC_INTEL_AVS_MACH_RT286=m @@ -8648,14 +8683,28 @@ CONFIG_CHECK_SIGNATURE=y CONFIG_CHELSIO_LIB=m CONFIG_CLOSURES=y CONFIG_COMPAT_NETLINK_MESSAGES=y +CONFIG_CRC4=m +CONFIG_CRC7=m +CONFIG_CRC8=m +CONFIG_CRC_CCITT=m +CONFIG_CRYPTO_CHACHA20_X86_64=y CONFIG_CRYPTO_DEV_ATMEL_I2C=m CONFIG_CRYPTO_DEV_NITROX=m CONFIG_CRYPTO_DEV_QAT=m CONFIG_CRYPTO_LIB_AESCFB=y CONFIG_CRYPTO_LIB_ARC4=m +CONFIG_CRYPTO_LIB_CHACHA=y +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=y +CONFIG_CRYPTO_LIB_CHACHA_GENERIC=y +CONFIG_CRYPTO_LIB_CHACHA_INTERNAL=y +CONFIG_CRYPTO_LIB_POLY1305=y +CONFIG_CRYPTO_LIB_POLY1305_GENERIC=y +CONFIG_CRYPTO_LIB_POLY1305_INTERNAL=y +CONFIG_CRYPTO_POLY1305_X86_64=y CONFIG_CXL_PORT=m CONFIG_DCA=m CONFIG_DELL_WMI_DESCRIPTOR=m +CONFIG_DEV_SYNC_PROBE=m CONFIG_DMA_DECLARE_COHERENT=y CONFIG_DMA_ENGINE_RAID=y CONFIG_DMA_OF=y @@ -8685,6 +8734,7 @@ CONFIG_DRM_DISPLAY_HELPER=m CONFIG_DRM_EXEC=m CONFIG_DRM_GEM_DMA_HELPER=m CONFIG_DRM_GEM_SHMEM_HELPER=m +CONFIG_DRM_GPUSVM=m CONFIG_DRM_GPUVM=m CONFIG_DRM_I915_GVT=y CONFIG_DRM_KMS_HELPER=m @@ -8871,6 +8921,7 @@ CONFIG_PCIE_DW_PLAT=y CONFIG_PCIE_PLDA_HOST=y CONFIG_PCI_ECAM=y CONFIG_PCI_HOST_COMMON=y +CONFIG_PCI_PWRCTL=m CONFIG_PCS_LYNX=m CONFIG_PHYLIB_LEDS=y CONFIG_PINCTRL_CS47L15=y @@ -8894,7 +8945,6 @@ CONFIG_PPPOE_HASH_BITS=4 CONFIG_PREEMPTION=y CONFIG_PREEMPT_BUILD=y CONFIG_PREEMPT_COUNT=y -CONFIG_PREEMPT_RCU=y CONFIG_PWM_DWC_CORE=m CONFIG_PWM_LPSS=m CONFIG_QCA7000=m @@ -8903,10 +8953,8 @@ CONFIG_QCOM_PDR_HELPERS=m CONFIG_QCOM_PDR_MSG=m CONFIG_QCOM_QMI_HELPERS=m CONFIG_QTNFMAC=m -CONFIG_R8169_LEDS=y CONFIG_RAID6_PQ=m CONFIG_RATIONAL=y -CONFIG_REALTEK_PHY_HWMON=y CONFIG_REBOOT_MODE=m CONFIG_REED_SOLOMON_DEC16=y CONFIG_REGMAP=y @@ -8951,6 +8999,7 @@ CONFIG_RTW88_8703B=m CONFIG_RTW88_8723D=m CONFIG_RTW88_8723X=m CONFIG_RTW88_8812A=m +CONFIG_RTW88_8814A=m CONFIG_RTW88_8821A=m CONFIG_RTW88_8821C=m CONFIG_RTW88_8822B=m @@ -9038,6 +9087,7 @@ CONFIG_SND_SOC_ADAU1761=m CONFIG_SND_SOC_ADAU17X1=m CONFIG_SND_SOC_ADAU7118=m CONFIG_SND_SOC_ADAU_UTILS=m +CONFIG_SND_SOC_AMD_ACPI_MACH=m CONFIG_SND_SOC_AMD_ACP_I2S=m CONFIG_SND_SOC_AMD_ACP_LEGACY_COMMON=m CONFIG_SND_SOC_AMD_ACP_PCM=m @@ -9161,6 +9211,7 @@ CONFIG_SND_SOC_WM5102=m CONFIG_SND_SOC_WM8731=m CONFIG_SND_SOC_WM8804=m CONFIG_SND_SOC_WM_ADSP=m +CONFIG_SND_SOF_SOF_HDA_SDW_BPT=m CONFIG_SND_SST_ATOM_HIFI2_PLATFORM=m CONFIG_SND_SYNTH_EMUX=m CONFIG_SND_TIMER=m @@ -9176,6 +9227,7 @@ CONFIG_SOUND_OSS_CORE=y CONFIG_SPI_DYNAMIC=y CONFIG_SPI_FSL_LIB=m CONFIG_SPI_MASTER=y +CONFIG_SPI_OFFLOAD=y CONFIG_SPI_PXA2XX_PCI=m CONFIG_SSB_B43_PCI_BRIDGE=y CONFIG_SSB_BLOCKIO=y @@ -9258,8 +9310,7 @@ CONFIG_XEN_FRONT_PGDIR_SHBUF=m CONFIG_XEN_GRANT_DMA_IOMMU=y CONFIG_XEN_PV_DOM0=y CONFIG_XILLYBUS_CLASS=m -CONFIG_Z3FOLD=m CONFIG_ZPOOL=y CONFIG_ZSTD_COMPRESS=y CONFIG_ZSWAP_COMPRESSOR_DEFAULT="zstd" -CONFIG_ZSWAP_ZPOOL_DEFAULT="zbud" +CONFIG_ZSWAP_ZPOOL_DEFAULT="zsmalloc" diff --git a/debian/config/amd64/config.vm b/debian/config/amd64/config.vm index 0f90eac..d47f31c 100644 --- a/debian/config/amd64/config.vm +++ b/debian/config/amd64/config.vm @@ -11,8 +11,8 @@ CONFIG_X86_EXTENDED_PLATFORM=y # CONFIG_X86_NUMACHIP is not set # CONFIG_X86_VSMP is not set -CONFIG_X86_GOLDFISH=y # CONFIG_X86_INTEL_MID is not set +CONFIG_X86_GOLDFISH=y # CONFIG_X86_INTEL_LPSS is not set # CONFIG_X86_AMD_PLATFORM_DEVICE is not set CONFIG_IOSF_MBI=m @@ -97,8 +97,6 @@ CONFIG_IO_DELAY_NONE=y ## file: crypto/Kconfig ## CONFIG_CRYPTO_ECDH=m -CONFIG_CRYPTO_CTS=m -CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_DEFLATE=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m @@ -352,6 +350,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y ## CONFIG_CXL_BUS=y # CONFIG_CXL_MEM_RAW_COMMANDS is not set +# CONFIG_CXL_FEATURES is not set ## ## file: drivers/devfreq/Kconfig @@ -427,6 +426,9 @@ CONFIG_EDAC_LEGACY_SYSFS=y # CONFIG_EDAC_DEBUG is not set CONFIG_EDAC_DECODE_MCE=y CONFIG_EDAC_GHES=y +# CONFIG_EDAC_SCRUB is not set +# CONFIG_EDAC_ECS is not set +# CONFIG_EDAC_MEM_REPAIR is not set # CONFIG_EDAC_AMD64 is not set # CONFIG_EDAC_E752X is not set # CONFIG_EDAC_I82975X is not set @@ -484,6 +486,11 @@ CONFIG_GOOGLE_COREBOOT_TABLE=m CONFIG_GOOGLE_FRAMEBUFFER_COREBOOT=m CONFIG_GOOGLE_VPD=m +## +## file: drivers/fwctl/Kconfig +## +# CONFIG_FWCTL is not set + ## ## file: drivers/gnss/Kconfig ## @@ -562,6 +569,11 @@ CONFIG_DRM_HYPERV=m ## # CONFIG_DRM_AST is not set +## +## file: drivers/gpu/drm/bridge/Kconfig +## +# CONFIG_DRM_I2C_NXP_TDA998X is not set + ## ## file: drivers/gpu/drm/bridge/analogix/Kconfig ## @@ -598,14 +610,6 @@ CONFIG_DRM_CLIENT_DEFAULT_FBDEV=y ## # CONFIG_DRM_HISI_HIBMC is not set -## -## file: drivers/gpu/drm/i2c/Kconfig -## -# CONFIG_DRM_I2C_CH7006 is not set -# CONFIG_DRM_I2C_SIL164 is not set -# CONFIG_DRM_I2C_NXP_TDA998X is not set -# CONFIG_DRM_I2C_NXP_TDA9950 is not set - ## ## file: drivers/gpu/drm/i915/Kconfig ## @@ -639,6 +643,7 @@ CONFIG_DRM_QXL=m ## ## file: drivers/gpu/drm/tiny/Kconfig ## +# CONFIG_DRM_APPLETBDRM is not set CONFIG_DRM_BOCHS=m CONFIG_DRM_CIRRUS_QEMU=m # CONFIG_DRM_GM12U320 is not set @@ -843,6 +848,11 @@ CONFIG_HSI_CHAR=m ## # CONFIG_HTE is not set +## +## file: drivers/hv/Kconfig +## +CONFIG_MSHV_ROOT=m + ## ## file: drivers/hwmon/Kconfig ## @@ -894,6 +904,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_G762 is not set # CONFIG_SENSORS_HIH6130 is not set # CONFIG_SENSORS_HS3001 is not set +# CONFIG_SENSORS_HTU31 is not set # CONFIG_SENSORS_I5500 is not set # CONFIG_SENSORS_CORETEMP is not set # CONFIG_SENSORS_ISL28022 is not set @@ -1039,6 +1050,7 @@ CONFIG_SENSORS_PMBUS=m # CONFIG_SENSORS_DELTA_AHE50DC_FAN is not set # CONFIG_SENSORS_FSP_3Y is not set # CONFIG_SENSORS_DPS920AB is not set +# CONFIG_SENSORS_INA233 is not set # CONFIG_SENSORS_INSPUR_IPSPS is not set # CONFIG_SENSORS_IR35221 is not set # CONFIG_SENSORS_IR36021 is not set @@ -1631,6 +1643,7 @@ CONFIG_MFD_INTEL_PMC_BXT=m # CONFIG_MFD_MAX14577 is not set # CONFIG_MFD_MAX77541 is not set # CONFIG_MFD_MAX77693 is not set +# CONFIG_MFD_MAX77705 is not set # CONFIG_MFD_MAX77843 is not set # CONFIG_MFD_MAX8907 is not set # CONFIG_MFD_MAX8925 is not set @@ -1642,7 +1655,6 @@ CONFIG_MFD_INTEL_PMC_BXT=m # CONFIG_MFD_MENF21BMC is not set # CONFIG_MFD_VIPERBOARD is not set # CONFIG_MFD_RETU is not set -# CONFIG_MFD_PCF50633 is not set # CONFIG_MFD_SY7636A is not set # CONFIG_MFD_RDC321X is not set # CONFIG_MFD_RT4831 is not set @@ -2169,6 +2181,7 @@ CONFIG_FBNIC=m ## CONFIG_MCTP_SERIAL=m CONFIG_MCTP_TRANSPORT_I2C=m +CONFIG_MCTP_TRANSPORT_USB=m ## ## file: drivers/net/mdio/Kconfig @@ -2376,6 +2389,11 @@ CONFIG_PCIEASPM_DEFAULT=y # CONFIG_PCIEASPM_POWERSAVE is not set ## end choice +## +## file: drivers/pci/pwrctrl/Kconfig +## +# CONFIG_PCI_PWRCTL_SLOT is not set + ## ## file: drivers/pci/switch/Kconfig ## @@ -2446,6 +2464,7 @@ CONFIG_WMI_BMOF=m # CONFIG_GPD_POCKET_FAN is not set # CONFIG_WIRELESS_HOTKEY is not set # CONFIG_IBM_RTL is not set +# CONFIG_LENOVO_WMI_HOTKEY_UTILITIES is not set # CONFIG_SENSORS_HDAPS is not set # CONFIG_THINKPAD_LMI is not set CONFIG_ACPI_QUICKSTART=m @@ -2459,7 +2478,6 @@ CONFIG_MSI_WMI_PLATFORM=m # CONFIG_ACPI_CMPC is not set # CONFIG_TOPSTAR_LAPTOP is not set CONFIG_SERIAL_MULTI_INSTANTIATE=m -# CONFIG_MLX_PLATFORM is not set # CONFIG_INSPUR_PLATFORM_PROFILE is not set # CONFIG_LENOVO_WMI_CAMERA is not set # CONFIG_INTEL_IPS is not set @@ -2594,7 +2612,6 @@ CONFIG_CHARGER_GPIO=m CONFIG_BATTERY_GOLDFISH=m # CONFIG_BATTERY_RT5033 is not set # CONFIG_CHARGER_RT9455 is not set -# CONFIG_FUEL_GAUGE_STC3117 is not set # CONFIG_CHARGER_BD99954 is not set # CONFIG_BATTERY_UG3105 is not set # CONFIG_FUEL_GAUGE_MM8013 is not set @@ -2609,6 +2626,11 @@ CONFIG_BATTERY_GOLDFISH=m ## CONFIG_PPS=m +## +## file: drivers/pps/generators/Kconfig +## +CONFIG_PPS_GENERATOR_TIO=m + ## ## file: drivers/ptp/Kconfig ## @@ -2864,6 +2886,7 @@ CONFIG_GOLDFISH_TTY=m # CONFIG_SERIAL_8250_RSA is not set # CONFIG_SERIAL_8250_RT288X is not set # CONFIG_SERIAL_8250_MID is not set +# CONFIG_SERIAL_8250_NI is not set ## ## file: drivers/ufs/Kconfig @@ -3158,6 +3181,7 @@ CONFIG_WATCHDOG_PRETIMEOUT_GOV_NOOP=m CONFIG_WATCHDOG_PRETIMEOUT_GOV_PANIC=m CONFIG_SOFT_WATCHDOG_PRETIMEOUT=y # CONFIG_LENOVO_SE10_WDT is not set +# CONFIG_LENOVO_SE30_WDT is not set # CONFIG_XILINX_WATCHDOG is not set # CONFIG_ZIIRAVE_WATCHDOG is not set # CONFIG_CADENCE_WATCHDOG is not set @@ -3422,11 +3446,6 @@ CONFIG_CIFS_SWN_UPCALL=y CONFIG_CIFS_FSCACHE=y # CONFIG_CIFS_COMPRESSION is not set -## -## file: fs/sysv/Kconfig -## -# CONFIG_SYSV_FS is not set - ## ## file: fs/ufs/Kconfig ## @@ -3560,12 +3579,9 @@ CONFIG_ZSWAP_SHRINKER_DEFAULT_ON=y CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD=y ## end choice ## choice: Default allocator -CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y -# CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD_DEPRECATED is not set -# CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set +CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC=y ## end choice -CONFIG_ZBUD=y -CONFIG_Z3FOLD_DEPRECATED=m +CONFIG_ZSMALLOC=y # CONFIG_HWPOISON_INJECT is not set CONFIG_NUMA_EMU=y @@ -3880,6 +3896,7 @@ CONFIG_IPE_PROP_DM_VERITY_SIGNATURE=y ## file: security/keys/Kconfig ## # CONFIG_KEYS_REQUEST_CACHE is not set +# CONFIG_BIG_KEYS is not set # CONFIG_TRUSTED_KEYS is not set # CONFIG_USER_DECRYPTED_DATA is not set @@ -3898,9 +3915,20 @@ CONFIG_ASYNC_PQ=m CONFIG_ASYNC_RAID6_RECOV=m CONFIG_BLK_DEV_RNBD=y CONFIG_CHECK_SIGNATURE=y +CONFIG_CRC_CCITT=m +CONFIG_CRYPTO_CHACHA20_X86_64=m CONFIG_CRYPTO_LIB_AESCFB=m CONFIG_CRYPTO_LIB_ARC4=m +CONFIG_CRYPTO_LIB_CHACHA=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m +CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m +CONFIG_CRYPTO_LIB_CHACHA_INTERNAL=m +CONFIG_CRYPTO_LIB_POLY1305=m +CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m +CONFIG_CRYPTO_LIB_POLY1305_INTERNAL=m +CONFIG_CRYPTO_POLY1305_X86_64=m CONFIG_CXL_PORT=y +CONFIG_DEV_SYNC_PROBE=m CONFIG_DRM_BRIDGE=y CONFIG_DRM_CLIENT=y CONFIG_DRM_CLIENT_DEFAULT="fbdev" @@ -3953,7 +3981,6 @@ CONFIG_PPPOE_HASH_BITS=4 CONFIG_PREEMPTION=y CONFIG_PREEMPT_BUILD=y CONFIG_PREEMPT_COUNT=y -CONFIG_PREEMPT_RCU=y CONFIG_RAID6_PQ=m CONFIG_RATIONAL=y CONFIG_REGMAP=y @@ -3980,8 +4007,7 @@ CONFIG_USB_XHCI_PCI=m CONFIG_VIDEOMODE_HELPERS=y CONFIG_VIRTIO_DMA_SHARED_BUFFER=m CONFIG_XEN_FRONT_PGDIR_SHBUF=m -CONFIG_Z3FOLD=m CONFIG_ZPOOL=y CONFIG_ZSTD_COMPRESS=y CONFIG_ZSWAP_COMPRESSOR_DEFAULT="zstd" -CONFIG_ZSWAP_ZPOOL_DEFAULT="zbud" +CONFIG_ZSWAP_ZPOOL_DEFAULT="zsmalloc" diff --git a/debian/config/config b/debian/config/config index 26199a0..f0d296f 100644 --- a/debian/config/config +++ b/debian/config/config @@ -117,7 +117,6 @@ CONFIG_MITIGATION_PAGE_TABLE_ISOLATION=y # CONFIG_MITIGATION_SRBDS is not set # CONFIG_MITIGATION_SSB is not set CONFIG_PCI_MMCONFIG=y -# CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_ISA_BUS is not set CONFIG_ISA_DMA_API=y CONFIG_IA32_EMULATION=y @@ -127,7 +126,11 @@ CONFIG_IA32_EMULATION=y ## ## file: arch/x86/Kconfig.cpu ## -## choice: Processor family +## choice: x86_64 Compiler Build Optimization +# CONFIG_X86_NATIVE_CPU is not set +CONFIG_GENERIC_CPU=y +# CONFIG_MNATIVE_INTEL is not set +# CONFIG_MNATIVE_AMD is not set # CONFIG_MK8 is not set # CONFIG_MK8SSE3 is not set # CONFIG_MK10 is not set @@ -144,7 +147,6 @@ CONFIG_IA32_EMULATION=y # CONFIG_MZEN4 is not set # CONFIG_MZEN5 is not set # CONFIG_MPSC is not set -# CONFIG_MATOM is not set # CONFIG_MCORE2 is not set # CONFIG_MNEHALEM is not set # CONFIG_MWESTMERE is not set @@ -160,8 +162,8 @@ CONFIG_IA32_EMULATION=y # CONFIG_MCANNONLAKE is not set # CONFIG_MICELAKE_CLIENT is not set # CONFIG_MICELAKE_SERVER is not set -# CONFIG_MCASCADELAKE is not set # CONFIG_MCOOPERLAKE is not set +# CONFIG_MCASCADELAKE is not set # CONFIG_MTIGERLAKE is not set # CONFIG_MSAPPHIRERAPIDS is not set # CONFIG_MROCKETLAKE is not set @@ -169,9 +171,6 @@ CONFIG_IA32_EMULATION=y # CONFIG_MRAPTORLAKE is not set # CONFIG_MMETEORLAKE is not set # CONFIG_MEMERALDRAPIDS is not set -CONFIG_GENERIC_CPU=y -# CONFIG_MNATIVE_INTEL is not set -# CONFIG_MNATIVE_AMD is not set ## end choice # CONFIG_PROCESSOR_SELECT is not set CONFIG_CPU_SUP_INTEL=y @@ -332,6 +331,7 @@ CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_AUTHENC=m +CONFIG_CRYPTO_KRB5ENC=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_RSA=y CONFIG_CRYPTO_DH=y @@ -355,10 +355,12 @@ CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_CHACHA20=m CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_CTR=y +CONFIG_CRYPTO_CTS=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_XTS=y CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_CCM=m @@ -386,8 +388,6 @@ CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_XXHASH=m CONFIG_CRYPTO_CRC32C=y CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_CRCT10DIF=y -CONFIG_CRYPTO_CRC64_ROCKSOFT=y CONFIG_CRYPTO_LZO=y CONFIG_CRYPTO_ANSI_CPRNG=m CONFIG_CRYPTO_DRBG_MENU=y @@ -414,6 +414,12 @@ CONFIG_PKCS7_MESSAGE_PARSER=y CONFIG_SIGNED_PE_FILE_VERIFICATION=y # CONFIG_FIPS_SIGNATURE_SELFTEST is not set +## +## file: crypto/krb5/Kconfig +## +CONFIG_CRYPTO_KRB5=m +# CONFIG_CRYPTO_KRB5_SELFTESTS is not set + ## ## file: drivers/acpi/Kconfig ## @@ -663,6 +669,7 @@ CONFIG_X86_POWERNOW_K8=m CONFIG_X86_AMD_FREQ_SENSITIVITY=m CONFIG_X86_SPEEDSTEP_CENTRINO=m CONFIG_X86_P4_CLOCKMOD=m +CONFIG_CPUFREQ_ARCH_CUR_FREQ=y ## ## file: drivers/cpuidle/Kconfig @@ -1169,6 +1176,7 @@ CONFIG_PCI_REALLOC_ENABLE_AUTO=y CONFIG_PCI_STUB=m CONFIG_PCI_PF_STUB=m CONFIG_XEN_PCIDEV_FRONTEND=m +CONFIG_PCI_DOE=y CONFIG_PCI_IOV=y CONFIG_PCI_PRI=y CONFIG_PCI_PASID=y @@ -2079,7 +2087,6 @@ CONFIG_UDF_FS=m ## file: fs/unicode/Kconfig ## CONFIG_UNICODE=y -# CONFIG_UNICODE_NORMALIZATION_SELFTEST is not set ## ## file: fs/verity/Kconfig @@ -2161,7 +2168,6 @@ CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y CONFIG_CPUSETS=y # CONFIG_CPUSETS_V1 is not set -CONFIG_PROC_PID_CPUSET=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y @@ -2419,17 +2425,6 @@ CONFIG_SYNTH_EVENTS=y ## file: lib/Kconfig ## CONFIG_PACKING=y -CONFIG_CRC_CCITT=m -CONFIG_CRC16=y -CONFIG_CRC_T10DIF=y -CONFIG_CRC64_ROCKSOFT=y -CONFIG_CRC_ITU_T=m -CONFIG_CRC32=y -CONFIG_CRC64=y -CONFIG_CRC4=m -CONFIG_CRC7=m -CONFIG_LIBCRC32C=y -CONFIG_CRC8=m CONFIG_CRC_OPTIMIZATIONS=y # CONFIG_RANDOM32_SELFTEST is not set # CONFIG_GLOB_SELFTEST is not set @@ -2476,6 +2471,7 @@ CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y # CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B is not set +# CONFIG_OBJTOOL_WERROR is not set CONFIG_VMLINUX_MAP=y CONFIG_BUILTIN_MODULE_RANGES=y # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set @@ -2493,6 +2489,7 @@ CONFIG_DEBUG_FS_ALLOW_ALL=y # CONFIG_SHRINKER_DEBUG is not set # CONFIG_DEBUG_STACK_USAGE is not set CONFIG_SCHED_STACK_END_CHECK=y +# CONFIG_DEBUG_VFS is not set # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_VM_PGTABLE is not set # CONFIG_DEBUG_VIRTUAL is not set @@ -2509,9 +2506,9 @@ CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120 # CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_DETECT_HUNG_TASK_BLOCKER=y # CONFIG_WQ_WATCHDOG is not set # CONFIG_TEST_LOCKUP is not set -# CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y # CONFIG_PROVE_LOCKING is not set # CONFIG_LOCK_STAT is not set @@ -2565,14 +2562,6 @@ CONFIG_IO_STRICT_DEVMEM=y ## # CONFIG_UBSAN is not set -## -## file: lib/crypto/Kconfig -## -CONFIG_CRYPTO_LIB_CHACHA=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_POLY1305=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m - ## ## file: lib/fonts/Kconfig ## @@ -2620,7 +2609,6 @@ CONFIG_XZ_DEC_MICROLZMA=y ## file: mm/Kconfig ## CONFIG_SWAP=y -CONFIG_ZSMALLOC=m # CONFIG_ZSMALLOC_STAT is not set CONFIG_ZSMALLOC_CHAIN_SIZE=8 # CONFIG_SLUB_TINY is not set @@ -2657,6 +2645,7 @@ CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y # CONFIG_TRANSPARENT_HUGEPAGE_NEVER is not set ## end choice # CONFIG_READ_ONLY_THP_FOR_FS is not set +# CONFIG_NO_PAGE_MAPCOUNT is not set # CONFIG_CMA is not set CONFIG_MEM_SOFT_DIRTY=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y @@ -3315,8 +3304,6 @@ CONFIG_SECURITY_NETWORK_XFRM=y CONFIG_SECURITY_PATH=y CONFIG_INTEL_TXT=y CONFIG_LSM_MMAP_MIN_ADDR=65536 -CONFIG_HARDENED_USERCOPY=y -CONFIG_FORTIFY_SOURCE=y # CONFIG_STATIC_USERMODEHELPER is not set ## choice: First legacy 'major LSM' to be initialized # CONFIG_DEFAULT_SECURITY_SELINUX is not set @@ -3335,6 +3322,9 @@ CONFIG_INIT_STACK_ALL_ZERO=y CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y # CONFIG_INIT_ON_FREE_DEFAULT_ON is not set CONFIG_ZERO_CALL_USED_REGS=y +CONFIG_FORTIFY_SOURCE=y +CONFIG_HARDENED_USERCOPY=y +CONFIG_HARDENED_USERCOPY_DEFAULT_ON=y ## choice: Randomize layout of sensitive kernel structures CONFIG_RANDSTRUCT_NONE=y ## end choice @@ -3485,6 +3475,7 @@ CONFIG_ARCH_HAS_CPU_FINALIZE_INIT=y CONFIG_ARCH_HAS_CPU_PASID=y CONFIG_ARCH_HAS_CPU_RELAX=y CONFIG_ARCH_HAS_CRC32=y +CONFIG_ARCH_HAS_CRC64=y CONFIG_ARCH_HAS_CRC_T10DIF=y CONFIG_ARCH_HAS_CURRENT_STACK_POINTER=y CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y @@ -3494,6 +3485,7 @@ CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y CONFIG_ARCH_HAS_DMA_OPS=y CONFIG_ARCH_HAS_ELFCORE_COMPAT=y CONFIG_ARCH_HAS_ELF_RANDOMIZE=y +CONFIG_ARCH_HAS_EXECMEM_ROX=y CONFIG_ARCH_HAS_FAST_MULTIPLIER=y CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED=y CONFIG_ARCH_HAS_FORTIFY_SOURCE=y @@ -3511,6 +3503,7 @@ CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH=y CONFIG_ARCH_HAS_PKEYS=y CONFIG_ARCH_HAS_PMEM_API=y CONFIG_ARCH_HAS_PREEMPT_LAZY=y +CONFIG_ARCH_HAS_PTDUMP=y CONFIG_ARCH_HAS_PTE_DEVMAP=y CONFIG_ARCH_HAS_PTE_SPECIAL=y CONFIG_ARCH_HAS_SET_DIRECT_MAP=y @@ -3560,6 +3553,7 @@ CONFIG_ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP=y CONFIG_ARCH_SUPPORTS_LTO_CLANG=y CONFIG_ARCH_SUPPORTS_LTO_CLANG_THIN=y CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y +CONFIG_ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS=y CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y CONFIG_ARCH_SUPPORTS_PAGE_TABLE_CHECK=y CONFIG_ARCH_SUPPORTS_PER_VMA_LOCK=y @@ -3579,6 +3573,7 @@ CONFIG_ARCH_USE_MEMTEST=y CONFIG_ARCH_USE_QUEUED_RWLOCKS=y CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y CONFIG_ARCH_USE_SYM_ANNOTATIONS=y +CONFIG_ARCH_VMLINUX_NEEDS_RELOCS=y CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y CONFIG_ARCH_WANTS_NO_INSTR=y CONFIG_ARCH_WANTS_THP_SWAP=y @@ -3586,6 +3581,7 @@ CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y CONFIG_ARCH_WANT_GENERAL_HUGETLB=y +CONFIG_ARCH_WANT_HUGETLB_VMEMMAP_PREINIT=y CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y CONFIG_ARCH_WANT_LD_ORPHAN_WARN=y CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y @@ -3643,7 +3639,13 @@ CONFIG_CONTIG_ALLOC=y CONFIG_CPU_FREQ_GOV_ATTR_SET=y CONFIG_CPU_FREQ_GOV_COMMON=y CONFIG_CPU_RMAP=y +CONFIG_CRC16=y +CONFIG_CRC32=y CONFIG_CRC32_ARCH=y +CONFIG_CRC64=y +CONFIG_CRC64_ARCH=y +CONFIG_CRC_ITU_T=m +CONFIG_CRC_T10DIF=y CONFIG_CRC_T10DIF_ARCH=y CONFIG_CRYPTO_ACOMP2=y CONFIG_CRYPTO_AEAD=m @@ -3658,7 +3660,6 @@ CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519=y CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305=y CONFIG_CRYPTO_BLOWFISH_COMMON=m CONFIG_CRYPTO_CAST_COMMON=m -CONFIG_CRYPTO_CHACHA20_X86_64=m CONFIG_CRYPTO_CURVE25519_X86=m CONFIG_CRYPTO_DRBG=y CONFIG_CRYPTO_DRBG_HMAC=y @@ -3668,6 +3669,7 @@ CONFIG_CRYPTO_GENIV=m CONFIG_CRYPTO_HASH=y CONFIG_CRYPTO_HASH2=y CONFIG_CRYPTO_HASH_INFO=y +CONFIG_CRYPTO_HKDF=y CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKS=64 CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKSIZE=32 CONFIG_CRYPTO_KDF800108_CTR=y @@ -3676,14 +3678,11 @@ CONFIG_CRYPTO_KPP2=y CONFIG_CRYPTO_LIB_AES=y CONFIG_CRYPTO_LIB_AESGCM=y CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=y -CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m -CONFIG_CRYPTO_LIB_CHACHA_INTERNAL=m +CONFIG_CRYPTO_LIB_CURVE25519=m CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m CONFIG_CRYPTO_LIB_CURVE25519_INTERNAL=m CONFIG_CRYPTO_LIB_DES=m CONFIG_CRYPTO_LIB_GF128MUL=y -CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m -CONFIG_CRYPTO_LIB_POLY1305_INTERNAL=m CONFIG_CRYPTO_LIB_POLY1305_RSIZE=11 CONFIG_CRYPTO_LIB_SHA1=y CONFIG_CRYPTO_LIB_SHA256=y @@ -3691,7 +3690,6 @@ CONFIG_CRYPTO_LIB_UTILS=y CONFIG_CRYPTO_MANAGER2=y CONFIG_CRYPTO_NHPOLY1305=m CONFIG_CRYPTO_NULL2=y -CONFIG_CRYPTO_POLY1305_X86_64=m CONFIG_CRYPTO_POLYVAL=m CONFIG_CRYPTO_RNG=y CONFIG_CRYPTO_RNG2=y @@ -3706,6 +3704,7 @@ CONFIG_CRYPTO_SM4=m CONFIG_CRYPTO_TWOFISH_COMMON=m CONFIG_CRYPTO_USER_API=m CONFIG_CRYPTO_XCTR=m +CONFIG_CXL_MCE=y CONFIG_CXL_SUSPEND=y CONFIG_DCACHE_WORD_ACCESS=y CONFIG_DEBUG_INFO=y @@ -3792,6 +3791,7 @@ CONFIG_FUNCTION_ALIGNMENT_16B=y CONFIG_FUNCTION_ALIGNMENT_4B=y CONFIG_FUNCTION_PADDING_BYTES=16 CONFIG_FUNCTION_PADDING_CFI=11 +CONFIG_FUNCTION_TRACE_ARGS=y CONFIG_FUTEX_PI=y CONFIG_FWNODE_MDIO=m CONFIG_FW_LOADER_PAGED_BUF=y @@ -3824,12 +3824,12 @@ CONFIG_GENERIC_MSI_IRQ=y CONFIG_GENERIC_NET_UTILS=y CONFIG_GENERIC_PCI_IOMAP=y CONFIG_GENERIC_PENDING_IRQ=y -CONFIG_GENERIC_PTDUMP=y CONFIG_GENERIC_SMP_IDLE_THREAD=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y CONFIG_GENERIC_TIME_VSYSCALL=y CONFIG_GENERIC_TRACER=y +CONFIG_GENERIC_VDSO_DATA_STORE=y CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT=y CONFIG_GENERIC_VDSO_TIME_NS=y CONFIG_GET_FREE_REGION=y @@ -4032,6 +4032,7 @@ CONFIG_IOMMU_IOVA=y CONFIG_IOMMU_IO_PGTABLE=y CONFIG_IOMMU_MM_DATA=y CONFIG_IOMMU_SVA=y +CONFIG_IO_URING_ZCRX=y CONFIG_IO_WQ=y CONFIG_IPV6_FOU=m CONFIG_IPV6_FOU_TUNNEL=m @@ -4045,14 +4046,13 @@ CONFIG_IRQ_BYPASS_MANAGER=m CONFIG_IRQ_DOMAIN=y CONFIG_IRQ_DOMAIN_HIERARCHY=y CONFIG_IRQ_FORCED_THREADING=y -CONFIG_IRQ_MSI_IOMMU=y CONFIG_IRQ_WORK=y CONFIG_JBD2=y -CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y CONFIG_KERNFS=y CONFIG_KPROBES_ON_FTRACE=y CONFIG_KRETPROBES=y CONFIG_KRETPROBE_ON_RETHOOK=y +CONFIG_KVFREE_RCU_BATCHED=y CONFIG_KVM_ASYNC_PF=y CONFIG_KVM_COMMON=y CONFIG_KVM_COMPAT=y @@ -4064,6 +4064,7 @@ CONFIG_KVM_GENERIC_MMU_NOTIFIER=y CONFIG_KVM_GENERIC_PRE_FAULT_MEMORY=y CONFIG_KVM_GENERIC_PRIVATE_MEM=y CONFIG_KVM_MMIO=y +CONFIG_KVM_MMU_LOCKLESS_AGING=y CONFIG_KVM_PRIVATE_MEM=y CONFIG_KVM_VFIO=y CONFIG_KVM_X86=m @@ -4102,6 +4103,7 @@ CONFIG_MMU_GATHER_RCU_TABLE_FREE=y CONFIG_MMU_GATHER_TABLE_FREE=y CONFIG_MMU_LAZY_TLB_REFCOUNT=y CONFIG_MMU_NOTIFIER=y +CONFIG_MM_ID=y CONFIG_MODULES_TREE_LOOKUP=y CONFIG_MODULES_USE_ELF_RELA=y CONFIG_MODULE_SIG_FORMAT=y @@ -4187,6 +4189,7 @@ CONFIG_OUTPUT_FORMAT="elf64-x86-64" CONFIG_P2SB=y CONFIG_PADATA=y CONFIG_PAGE_COUNTER=y +CONFIG_PAGE_MAPCOUNT=y CONFIG_PAGE_POOL=y CONFIG_PAGE_SHIFT=12 CONFIG_PAGE_SIZE_LESS_THAN_256KB=y @@ -4197,7 +4200,6 @@ CONFIG_PARAVIRT_XXL=y CONFIG_PCIE_PME=y CONFIG_PCI_ATS=y CONFIG_PCI_DIRECT=y -CONFIG_PCI_DOE=y CONFIG_PCI_DOMAINS=y CONFIG_PCI_LABEL=y CONFIG_PCI_LOCKLESS_CONFIG=y @@ -4221,7 +4223,7 @@ CONFIG_PROC_CPU_RESCTRL=y CONFIG_PROC_PID_ARCH_STATUS=y CONFIG_PROC_THERMAL_MMIO_RAPL=m CONFIG_PSTORE_ZONE=m -CONFIG_PTDUMP_CORE=y +CONFIG_PTDUMP=y CONFIG_PTP_1588_CLOCK_OPTIONAL=m CONFIG_QUEUED_RWLOCKS=y CONFIG_QUEUED_SPINLOCKS=y @@ -4232,6 +4234,7 @@ CONFIG_RCU_STALL_COMMON=y CONFIG_REED_SOLOMON=m CONFIG_REED_SOLOMON_DEC8=y CONFIG_REED_SOLOMON_ENC8=y +CONFIG_RESCTRL_FS_PSEUDO_LOCK=y CONFIG_RETHOOK=y CONFIG_RING_BUFFER=y CONFIG_RPMSG=m @@ -4267,6 +4270,7 @@ CONFIG_SOFTIRQ_ON_OWN_STACK=y CONFIG_SPARSEMEM=y CONFIG_SPARSEMEM_EXTREME=y CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y +CONFIG_SPARSEMEM_VMEMMAP_PREINIT=y CONFIG_SPLIT_PMD_PTLOCKS=y CONFIG_SPLIT_PTE_PTLOCKS=y CONFIG_SQUASHFS_DECOMP_MULTI=y @@ -4327,7 +4331,6 @@ CONFIG_VDSO_GETRANDOM=y CONFIG_VFIO_IOMMU_TYPE1=m CONFIG_VFIO_PCI_CORE=m CONFIG_VFIO_PCI_INTX=y -CONFIG_VFIO_PCI_MMAP=y CONFIG_VFIO_VIRQFD=y CONFIG_VGASTATE=m CONFIG_VHOST=m @@ -4349,9 +4352,22 @@ CONFIG_X86=y CONFIG_X86_64=y CONFIG_X86_64_SMP=y CONFIG_X86_CMOV=y -CONFIG_X86_CMPXCHG64=y +CONFIG_X86_CX8=y CONFIG_X86_DEBUGCTLMSR=y CONFIG_X86_DIRECT_GBPAGES=y +CONFIG_X86_DISABLED_FEATURE_CALL_DEPTH=y +CONFIG_X86_DISABLED_FEATURE_CENTAUR_MCR=y +CONFIG_X86_DISABLED_FEATURE_CYRIX_ARR=y +CONFIG_X86_DISABLED_FEATURE_IBT=y +CONFIG_X86_DISABLED_FEATURE_K6_MTRR=y +CONFIG_X86_DISABLED_FEATURE_LA57=y +CONFIG_X86_DISABLED_FEATURE_LAM=y +CONFIG_X86_DISABLED_FEATURE_RETHUNK=y +CONFIG_X86_DISABLED_FEATURE_RETPOLINE=y +CONFIG_X86_DISABLED_FEATURE_RETPOLINE_LFENCE=y +CONFIG_X86_DISABLED_FEATURE_UNRET=y +CONFIG_X86_DISABLED_FEATURE_USER_SHSTK=y +CONFIG_X86_DISABLED_FEATURE_VME=y CONFIG_X86_HAVE_PAE=y CONFIG_X86_HV_CALLBACK_VECTOR=y CONFIG_X86_INTERNODE_CACHE_SHIFT=6 @@ -4362,6 +4378,18 @@ CONFIG_X86_MCE_THRESHOLD=y CONFIG_X86_MEM_ENCRYPT=y CONFIG_X86_MINIMUM_CPU_FAMILY=64 CONFIG_X86_NEED_RELOCS=y +CONFIG_X86_REQUIRED_FEATURE_ALWAYS=y +CONFIG_X86_REQUIRED_FEATURE_CMOV=y +CONFIG_X86_REQUIRED_FEATURE_CPUID=y +CONFIG_X86_REQUIRED_FEATURE_CX8=y +CONFIG_X86_REQUIRED_FEATURE_FPU=y +CONFIG_X86_REQUIRED_FEATURE_FXSR=y +CONFIG_X86_REQUIRED_FEATURE_LM=y +CONFIG_X86_REQUIRED_FEATURE_MSR=y +CONFIG_X86_REQUIRED_FEATURE_NOPL=y +CONFIG_X86_REQUIRED_FEATURE_PAE=y +CONFIG_X86_REQUIRED_FEATURE_XMM=y +CONFIG_X86_REQUIRED_FEATURE_XMM2=y CONFIG_X86_SPEEDSTEP_LIB=m CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y CONFIG_X86_THERMAL_VECTOR=y diff --git a/debian/patches/bugfix/all/disable-some-marvell-phys.patch b/debian/patches/bugfix/all/disable-some-marvell-phys.patch index af7bca8..8dc7589 100644 --- a/debian/patches/bugfix/all/disable-some-marvell-phys.patch +++ b/debian/patches/bugfix/all/disable-some-marvell-phys.patch @@ -42,7 +42,7 @@ correctness. static int m88e1540_get_fld(struct phy_device *phydev, u8 *msecs) { -@@ -3848,6 +3852,7 @@ static struct phy_driver marvell_drivers +@@ -3828,6 +3832,7 @@ static struct phy_driver marvell_drivers .led_hw_control_set = m88e1318_led_hw_control_set, .led_hw_control_get = m88e1318_led_hw_control_get, }, @@ -50,7 +50,7 @@ correctness. { .phy_id = MARVELL_PHY_ID_88E1145, .phy_id_mask = MARVELL_PHY_ID_MASK, -@@ -3871,6 +3876,8 @@ static struct phy_driver marvell_drivers +@@ -3851,6 +3856,8 @@ static struct phy_driver marvell_drivers .cable_test_start = m88e1111_vct_cable_test_start, .cable_test_get_status = m88e1111_vct_cable_test_get_status, }, @@ -59,7 +59,7 @@ correctness. { .phy_id = MARVELL_PHY_ID_88E1149R, .phy_id_mask = MARVELL_PHY_ID_MASK, -@@ -3889,6 +3896,8 @@ static struct phy_driver marvell_drivers +@@ -3869,6 +3876,8 @@ static struct phy_driver marvell_drivers .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, }, @@ -68,7 +68,7 @@ correctness. { .phy_id = MARVELL_PHY_ID_88E1240, .phy_id_mask = MARVELL_PHY_ID_MASK, -@@ -3909,6 +3918,7 @@ static struct phy_driver marvell_drivers +@@ -3889,6 +3898,7 @@ static struct phy_driver marvell_drivers .get_tunable = m88e1011_get_tunable, .set_tunable = m88e1011_set_tunable, }, @@ -76,7 +76,7 @@ correctness. { .phy_id = MARVELL_PHY_ID_88E1116R, .phy_id_mask = MARVELL_PHY_ID_MASK, -@@ -4197,9 +4207,9 @@ static const struct mdio_device_id __may +@@ -4177,9 +4187,9 @@ static const struct mdio_device_id __may { MARVELL_PHY_ID_88E1111_FINISAR, MARVELL_PHY_ID_MASK }, { MARVELL_PHY_ID_88E1118, MARVELL_PHY_ID_MASK }, { MARVELL_PHY_ID_88E1121R, MARVELL_PHY_ID_MASK }, diff --git a/debian/patches/bugfix/all/documentation-use-relative-source-paths-in-abi-documentation.patch b/debian/patches/bugfix/all/documentation-use-relative-source-paths-in-abi-documentation.patch deleted file mode 100644 index c03b727..0000000 --- a/debian/patches/bugfix/all/documentation-use-relative-source-paths-in-abi-documentation.patch +++ /dev/null @@ -1,32 +0,0 @@ -From: Ben Hutchings -Date: Mon, 16 Sep 2024 00:07:04 +0200 -Subject: Documentation: Use relative source filenames in ABI documentation - -Currently the ABI documentation files contain absolute source -filenames, which makes them unreproducible if the build directory can -vary. - -Remove the source base directory ($srctree) from the source filenames -shown in the documentation. - -Signed-off-by: Ben Hutchings ---- ---- a/Documentation/sphinx/kernel_abi.py -+++ b/Documentation/sphinx/kernel_abi.py -@@ -103,6 +103,7 @@ class KernelCmd(Directive): - lines = code_block + "\n\n" - - line_regex = re.compile(r"^\.\. LINENO (\S+)\#([0-9]+)$") -+ srctree = os.path.abspath(os.environ["srctree"]) - ln = 0 - n = 0 - f = fname -@@ -127,7 +128,7 @@ class KernelCmd(Directive): - # sphinx counts lines from 0 - ln = int(match.group(2)) - 1 - else: -- content.append(line, f, ln) -+ content.append(line, os.path.relpath(f, srctree), ln) - - kernellog.info(self.state.document.settings.env.app, "%s: parsed %i lines" % (fname, n)) - diff --git a/debian/patches/bugfix/all/kbuild-fix-recordmcount-dependency.patch b/debian/patches/bugfix/all/kbuild-fix-recordmcount-dependency.patch index 28ea6d3..f4270fe 100644 --- a/debian/patches/bugfix/all/kbuild-fix-recordmcount-dependency.patch +++ b/debian/patches/bugfix/all/kbuild-fix-recordmcount-dependency.patch @@ -9,7 +9,7 @@ sources. --- a/scripts/Makefile.build +++ b/scripts/Makefile.build -@@ -188,6 +188,11 @@ cmd_record_mcount = $(if $(findstring $( +@@ -184,6 +184,11 @@ cmd_record_mcount = $(if $(findstring $( $(sub_cmd_record_mcount)) endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT diff --git a/debian/patches/bugfix/all/perf-tools-pass-extra_cflags-through-to-libbpf-build-again.patch b/debian/patches/bugfix/all/perf-tools-pass-extra_cflags-through-to-libbpf-build-again.patch index ba1f741..7957c7f 100644 --- a/debian/patches/bugfix/all/perf-tools-pass-extra_cflags-through-to-libbpf-build-again.patch +++ b/debian/patches/bugfix/all/perf-tools-pass-extra_cflags-through-to-libbpf-build-again.patch @@ -16,7 +16,7 @@ Signed-off-by: Ben Hutchings --- --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf -@@ -945,7 +945,7 @@ $(LIBAPI)-clean: +@@ -963,7 +963,7 @@ $(LIBAPI)-clean: $(LIBBPF): FORCE | $(LIBBPF_OUTPUT) $(Q)$(MAKE) -C $(LIBBPF_DIR) FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) \ O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= subdir= \ diff --git a/debian/patches/bugfix/all/tools-build-remove-bpf-run-time-check-at-build-time.patch b/debian/patches/bugfix/all/tools-build-remove-bpf-run-time-check-at-build-time.patch index 3c5e7e7..05e6a60 100644 --- a/debian/patches/bugfix/all/tools-build-remove-bpf-run-time-check-at-build-time.patch +++ b/debian/patches/bugfix/all/tools-build-remove-bpf-run-time-check-at-build-time.patch @@ -21,7 +21,7 @@ Signed-off-by: Ben Hutchings + * much older kernel. Do "use" the attr structure here to avoid + * a "set but not used" warning. */ -- return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); +- return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)) == 0; + (void)&attr; + return 0; } diff --git a/debian/patches/bugfix/all/tools-perf-fix-missing-ldflags-for-some-programs.patch b/debian/patches/bugfix/all/tools-perf-fix-missing-ldflags-for-some-programs.patch index 3bec90c..a4ea77a 100644 --- a/debian/patches/bugfix/all/tools-perf-fix-missing-ldflags-for-some-programs.patch +++ b/debian/patches/bugfix/all/tools-perf-fix-missing-ldflags-for-some-programs.patch @@ -6,7 +6,7 @@ Signed-off-by: Ben Hutchings --- --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf -@@ -919,7 +919,7 @@ $(OUTPUT)dlfilters/%.o: dlfilters/%.c in +@@ -937,7 +937,7 @@ $(OUTPUT)dlfilters/%.o: dlfilters/%.c in .SECONDARY: $(DLFILTERS:.so=.o) $(OUTPUT)dlfilters/%.so: $(OUTPUT)dlfilters/%.o diff --git a/debian/patches/debian/add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by-default.patch b/debian/patches/debian/add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by-default.patch index a9f3fb3..1065ee0 100644 --- a/debian/patches/debian/add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by-default.patch +++ b/debian/patches/debian/add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by-default.patch @@ -1,7 +1,7 @@ From: Serge Hallyn Date: Fri, 31 May 2013 19:12:12 +0000 (+0100) Subject: add sysctl to disallow unprivileged CLONE_NEWUSER by default -Origin: https://kernel.ubuntu.com/git?p=serge%2Fubuntu-saucy.git;a=commit;h=5c847404dcb2e3195ad0057877e1422ae90892b8 +Origin: http://kernel.ubuntu.com/git?p=serge%2Fubuntu-saucy.git;a=commit;h=5c847404dcb2e3195ad0057877e1422ae90892b8 add sysctl to disallow unprivileged CLONE_NEWUSER by default @@ -34,7 +34,7 @@ Signed-off-by: Serge Hallyn /* * Minimum number of threads to boot the kernel */ -@@ -2172,6 +2178,10 @@ __latent_entropy struct task_struct *cop +@@ -2194,6 +2200,10 @@ __latent_entropy struct task_struct *cop if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) return ERR_PTR(-EINVAL); @@ -45,7 +45,7 @@ Signed-off-by: Serge Hallyn /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. -@@ -3325,6 +3335,12 @@ int ksys_unshare(unsigned long unshare_f +@@ -3354,6 +3364,12 @@ int ksys_unshare(unsigned long unshare_f if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; @@ -60,18 +60,18 @@ Signed-off-by: Serge Hallyn goto bad_unshare_out; --- a/kernel/sysctl.c +++ b/kernel/sysctl.c -@@ -135,6 +135,10 @@ static enum sysctl_writes_mode sysctl_wr - int sysctl_legacy_va_layout; - #endif +@@ -84,6 +84,10 @@ EXPORT_SYMBOL_GPL(sysctl_long_vals); + static const int ngroups_max = NGROUPS_MAX; + static const int cap_last_cap = CAP_LAST_CAP; +#ifdef CONFIG_USER_NS +extern int unprivileged_userns_clone; +#endif + - #endif /* CONFIG_SYSCTL */ + #ifdef CONFIG_PROC_SYSCTL - /* -@@ -1617,6 +1621,15 @@ static const struct ctl_table kern_table + /** +@@ -1595,6 +1599,15 @@ static const struct ctl_table kern_table .mode = 0644, .proc_handler = proc_dointvec, }, diff --git a/debian/patches/debian/android-enable-building-ashmem-and-binder-as-modules.patch b/debian/patches/debian/android-enable-building-ashmem-and-binder-as-modules.patch index b6af589..28a4e29 100644 --- a/debian/patches/debian/android-enable-building-ashmem-and-binder-as-modules.patch +++ b/debian/patches/debian/android-enable-building-ashmem-and-binder-as-modules.patch @@ -80,7 +80,7 @@ Consequently, the ashmem part of this patch has been removed. { --- a/mm/memory.c +++ b/mm/memory.c -@@ -6392,6 +6392,7 @@ inval: +@@ -6589,6 +6589,7 @@ inval: count_vm_vma_lock_event(VMA_LOCK_ABORT); return NULL; } diff --git a/debian/patches/debian/dccp-disable-auto-loading-as-mitigation-against-local-exploits.patch b/debian/patches/debian/dccp-disable-auto-loading-as-mitigation-against-local-exploits.patch index 9202fcb..48b11e6 100644 --- a/debian/patches/debian/dccp-disable-auto-loading-as-mitigation-against-local-exploits.patch +++ b/debian/patches/debian/dccp-disable-auto-loading-as-mitigation-against-local-exploits.patch @@ -15,7 +15,7 @@ Signed-off-by: Ben Hutchings --- --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c -@@ -1099,8 +1099,8 @@ module_exit(dccp_v4_exit); +@@ -1094,8 +1094,8 @@ module_exit(dccp_v4_exit); * values directly, Also cover the case where the protocol is not specified, * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP */ @@ -28,7 +28,7 @@ Signed-off-by: Ben Hutchings MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c -@@ -1174,8 +1174,8 @@ module_exit(dccp_v6_exit); +@@ -1167,8 +1167,8 @@ module_exit(dccp_v6_exit); * values directly, Also cover the case where the protocol is not specified, * i.e. net-pf-PF_INET6-proto-0-type-SOCK_DCCP */ diff --git a/debian/patches/debian/export-symbols-needed-by-android-drivers.patch b/debian/patches/debian/export-symbols-needed-by-android-drivers.patch index 58244a3..68de285 100644 --- a/debian/patches/debian/export-symbols-needed-by-android-drivers.patch +++ b/debian/patches/debian/export-symbols-needed-by-android-drivers.patch @@ -22,7 +22,7 @@ Export the currently un-exported symbols it depends on. --- a/fs/file.c +++ b/fs/file.c -@@ -845,6 +845,7 @@ struct file *file_close_fd(unsigned int +@@ -843,6 +843,7 @@ struct file *file_close_fd(unsigned int return file; } @@ -82,7 +82,7 @@ Export the currently un-exported symbols it depends on. * task_work_cancel_match - cancel a pending work added by task_work_add() --- a/mm/memory.c +++ b/mm/memory.c -@@ -2027,6 +2027,7 @@ void zap_page_range_single(struct vm_are +@@ -2020,6 +2020,7 @@ void zap_page_range_single(struct vm_are tlb_finish_mmu(&tlb); hugetlb_zap_end(vma, details); } diff --git a/debian/patches/debian/fanotify-taint-on-use-of-fanotify_access_permissions.patch b/debian/patches/debian/fanotify-taint-on-use-of-fanotify_access_permissions.patch index 6634997..f14ead9 100644 --- a/debian/patches/debian/fanotify-taint-on-use-of-fanotify_access_permissions.patch +++ b/debian/patches/debian/fanotify-taint-on-use-of-fanotify_access_permissions.patch @@ -12,7 +12,7 @@ actually used. --- --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c -@@ -1838,6 +1838,14 @@ static int do_fanotify_mark(int fanotify +@@ -1881,6 +1881,14 @@ static int do_fanotify_mark(int fanotify umask = FANOTIFY_EVENT_FLAGS; } diff --git a/debian/patches/debian/firmware_loader-log-direct-loading-failures-as-info-for-d-i.patch b/debian/patches/debian/firmware_loader-log-direct-loading-failures-as-info-for-d-i.patch new file mode 100644 index 0000000..f9b02c4 --- /dev/null +++ b/debian/patches/debian/firmware_loader-log-direct-loading-failures-as-info-for-d-i.patch @@ -0,0 +1,37 @@ +From: Ben Hutchings +Subject: firmware_loader: Log direct loading failures as info for d-i +Date: Thu, 30 May 2024 13:14:32 +0100 +Forwarded: not-needed + +On an installed Debian system, firmware packages will normally be +installed automatically based on a mapping of device IDs to firmware. +Within the Debian installer this has not yet happened and we need a +way to detect missing firmware. + +Although many/most drivers log firmware loading failures, they do so +using many different formats. This adds a single log message to the +firmware loader, which the installer's hw-detect package will look +for. The log level is set to "info" because some failures are +expected and we do not want to confuse users with bogus error messages +(like in bug #966218). + +NOTE: The log message format must not be changed without coordinating +this with the check-missing-firmware.sh in hw-detect. +--- + drivers/base/firmware_loader/fallback.c | 2 +- + drivers/base/firmware_loader/main.c | 17 ++++++++--------- + 2 files changed, 9 insertions(+), 10 deletions(-) + +--- a/drivers/base/firmware_loader/main.c ++++ b/drivers/base/firmware_loader/main.c +@@ -590,6 +590,10 @@ fw_get_filesystem_firmware(struct device + } + __putname(path); + ++ if (rc) ++ dev_info(device, "firmware: failed to load %s (%d)\n", ++ fw_priv->fw_name, rc); ++ + return rc; + } + diff --git a/debian/patches/debian/fs-enable-link-security-restrictions-by-default.patch b/debian/patches/debian/fs-enable-link-security-restrictions-by-default.patch index f8a9733..58af8c1 100644 --- a/debian/patches/debian/fs-enable-link-security-restrictions-by-default.patch +++ b/debian/patches/debian/fs-enable-link-security-restrictions-by-default.patch @@ -9,7 +9,7 @@ This reverts commit 561ec64ae67ef25cac8d72bb9c4bfc955edfd415 --- a/fs/namei.c +++ b/fs/namei.c -@@ -1094,8 +1094,8 @@ static inline void put_link(struct namei +@@ -1095,8 +1095,8 @@ static inline void put_link(struct namei path_put(&last->link); } diff --git a/debian/patches/debian/kernelvariables.patch b/debian/patches/debian/kernelvariables.patch index 9f5c928..277765a 100644 --- a/debian/patches/debian/kernelvariables.patch +++ b/debian/patches/debian/kernelvariables.patch @@ -19,7 +19,7 @@ use of $(ARCH) needs to be moved after this. --- --- a/Makefile +++ b/Makefile -@@ -405,36 +405,6 @@ include $(srctree)/scripts/subarch.inclu +@@ -402,36 +402,6 @@ include $(srctree)/scripts/subarch.inclu # Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile ARCH ?= $(SUBARCH) @@ -56,7 +56,7 @@ use of $(ARCH) needs to be moved after this. KCONFIG_CONFIG ?= .config export KCONFIG_CONFIG -@@ -554,6 +524,35 @@ RUSTFLAGS_KERNEL = +@@ -551,6 +521,35 @@ RUSTFLAGS_KERNEL = AFLAGS_KERNEL = LDFLAGS_vmlinux = diff --git a/debian/patches/debian/linux-perf-remove-remaining-source-filenames-from-executable.patch b/debian/patches/debian/linux-perf-remove-remaining-source-filenames-from-executable.patch index e2b49c6..973b59c 100644 --- a/debian/patches/debian/linux-perf-remove-remaining-source-filenames-from-executable.patch +++ b/debian/patches/debian/linux-perf-remove-remaining-source-filenames-from-executable.patch @@ -15,7 +15,7 @@ to the installed location. --- --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c -@@ -660,10 +660,12 @@ static int report__browse_hists(struct r +@@ -666,10 +666,12 @@ static int report__browse_hists(struct r path = system_path(TIPDIR); if (perf_tip(&help, path) || help == NULL) { diff --git a/debian/patches/debian/makefile-make-compiler-version-comparison-optional.patch b/debian/patches/debian/makefile-make-compiler-version-comparison-optional.patch index 6863d58..d81b090 100644 --- a/debian/patches/debian/makefile-make-compiler-version-comparison-optional.patch +++ b/debian/patches/debian/makefile-make-compiler-version-comparison-optional.patch @@ -20,7 +20,7 @@ is non-empty. --- --- a/Makefile +++ b/Makefile -@@ -1871,7 +1871,7 @@ PHONY += prepare +@@ -1882,7 +1882,7 @@ PHONY += prepare # now expand this into a simple variable to reduce the cost of shell evaluations prepare: CC_VERSION_TEXT := $(CC_VERSION_TEXT) prepare: diff --git a/debian/patches/debian/tools-perf-install-python-bindings.patch b/debian/patches/debian/tools-perf-install-python-bindings.patch index 97b5ad3..f6d1ea6 100644 --- a/debian/patches/debian/tools-perf-install-python-bindings.patch +++ b/debian/patches/debian/tools-perf-install-python-bindings.patch @@ -1,7 +1,7 @@ From: Adriaan Schmidt Date: Mon, 4 Apr 2022 13:38:33 +0200 Subject: tools: install perf python bindings -Bug-Debian: https://bugs.debian.org/860957 +Bug-Debian: http://bugs.debian.org/860957 Forwarded: not-needed --- @@ -10,7 +10,7 @@ Forwarded: not-needed --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf -@@ -1139,7 +1139,7 @@ install-bin: install-tools install-tests +@@ -1157,7 +1157,7 @@ install-bin: install-tools install-tests install: install-bin try-install-man install-python_ext: diff --git a/debian/patches/debian/tools-perf-perf-read-vdso-in-libexec.patch b/debian/patches/debian/tools-perf-perf-read-vdso-in-libexec.patch index 7c996c7..f66d54e 100644 --- a/debian/patches/debian/tools-perf-perf-read-vdso-in-libexec.patch +++ b/debian/patches/debian/tools-perf-perf-read-vdso-in-libexec.patch @@ -4,7 +4,7 @@ Subject: linux-tools: Install perf-read-vdso{,x}32 in directory under /usr/lib --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf -@@ -1067,21 +1067,21 @@ install-tools: all install-gtk +@@ -1085,21 +1085,21 @@ install-tools: all install-gtk $(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(includedir_SQ)/perf'; \ $(INSTALL) -m 644 include/perf/perf_dlfilter.h -t '$(DESTDIR_SQ)$(includedir_SQ)/perf' diff --git a/debian/patches/debian/uname-version-timestamp.patch b/debian/patches/debian/uname-version-timestamp.patch index 28e8d1d..b72633c 100644 --- a/debian/patches/debian/uname-version-timestamp.patch +++ b/debian/patches/debian/uname-version-timestamp.patch @@ -13,7 +13,7 @@ $KBUILD_BUILD_TIMESTAMP. --- a/init/Makefile +++ b/init/Makefile -@@ -29,7 +29,7 @@ preempt-flag-$(CONFIG_PREEMPT_DYNAMIC) : +@@ -30,7 +30,7 @@ preempt-flag-$(CONFIG_PREEMPT_DYNAMIC) : preempt-flag-$(CONFIG_PREEMPT_RT) := PREEMPT_RT build-version = $(or $(KBUILD_BUILD_VERSION), $(build-version-auto)) diff --git a/debian/patches/debian/yama-disable-by-default.patch b/debian/patches/debian/yama-disable-by-default.patch index 4dcdfc4..793a887 100644 --- a/debian/patches/debian/yama-disable-by-default.patch +++ b/debian/patches/debian/yama-disable-by-default.patch @@ -19,7 +19,7 @@ Forwarded: not-needed /* describe a ptrace relationship for potential exception */ struct ptrace_relation { -@@ -474,7 +474,7 @@ static inline void yama_init_sysctl(void +@@ -469,7 +469,7 @@ static inline void yama_init_sysctl(void static int __init yama_init(void) { diff --git a/debian/patches/features/all/lockdown/efi-add-an-efi_secure_boot-flag-to-indicate-secure-b.patch b/debian/patches/features/all/lockdown/efi-add-an-efi_secure_boot-flag-to-indicate-secure-b.patch index 71a0764..ded3d69 100644 --- a/debian/patches/features/all/lockdown/efi-add-an-efi_secure_boot-flag-to-indicate-secure-b.patch +++ b/debian/patches/features/all/lockdown/efi-add-an-efi_secure_boot-flag-to-indicate-secure-b.patch @@ -31,7 +31,7 @@ cc: linux-efi@vger.kernel.org --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c -@@ -1073,19 +1073,7 @@ void __init setup_arch(char **cmdline_p) +@@ -1127,19 +1127,7 @@ void __init setup_arch(char **cmdline_p) /* Allocate bigger log buffer */ setup_log_buf(1); diff --git a/debian/patches/features/all/lockdown/efi-lock-down-the-kernel-if-booted-in-secure-boot-mo.patch b/debian/patches/features/all/lockdown/efi-lock-down-the-kernel-if-booted-in-secure-boot-mo.patch index 867ac78..36e9bde 100644 --- a/debian/patches/features/all/lockdown/efi-lock-down-the-kernel-if-booted-in-secure-boot-mo.patch +++ b/debian/patches/features/all/lockdown/efi-lock-down-the-kernel-if-booted-in-secure-boot-mo.patch @@ -26,7 +26,7 @@ Signed-off-by: Salvatore Bonaccorso --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c -@@ -907,6 +907,8 @@ void __init setup_arch(char **cmdline_p) +@@ -964,6 +964,8 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled(EFI_BOOT)) efi_init(); @@ -35,7 +35,7 @@ Signed-off-by: Salvatore Bonaccorso reserve_ibft_region(); x86_init.resources.dmi_setup(); -@@ -1073,8 +1075,6 @@ void __init setup_arch(char **cmdline_p) +@@ -1127,8 +1129,6 @@ void __init setup_arch(char **cmdline_p) /* Allocate bigger log buffer */ setup_log_buf(1); diff --git a/debian/patches/features/all/security-perf-allow-further-restriction-of-perf_event_open.patch b/debian/patches/features/all/security-perf-allow-further-restriction-of-perf_event_open.patch index 2227d5d..35df3e2 100644 --- a/debian/patches/features/all/security-perf-allow-further-restriction-of-perf_event_open.patch +++ b/debian/patches/features/all/security-perf-allow-further-restriction-of-perf_event_open.patch @@ -22,9 +22,9 @@ Signed-off-by: Ben Hutchings --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h -@@ -1701,6 +1701,11 @@ int perf_cpu_time_max_percent_handler(co - int perf_event_max_stack_handler(const struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); +@@ -1684,6 +1684,11 @@ extern int sysctl_perf_event_sample_rate + + extern void perf_sample_event_took(u64 sample_len_ns); +static inline bool perf_paranoid_any(void) +{ @@ -36,7 +36,7 @@ Signed-off-by: Ben Hutchings --- a/kernel/events/core.c +++ b/kernel/events/core.c -@@ -449,8 +449,13 @@ static struct kmem_cache *perf_event_cac +@@ -450,8 +450,13 @@ static struct kmem_cache *perf_event_cac * 0 - disallow raw tracepoint access for unpriv * 1 - disallow cpu events for unpriv * 2 - disallow kernel profiling for unpriv @@ -48,9 +48,9 @@ Signed-off-by: Ben Hutchings int sysctl_perf_event_paranoid __read_mostly = 2; +#endif - /* Minimum for 512 kiB + 1 user control page */ - int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */ -@@ -12813,6 +12818,9 @@ SYSCALL_DEFINE5(perf_event_open, + /* Minimum for 512 kiB + 1 user control page. 'free' kiB per user. */ + static int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); +@@ -13084,6 +13089,9 @@ SYSCALL_DEFINE5(perf_event_open, if (err) return err; @@ -58,13 +58,13 @@ Signed-off-by: Ben Hutchings + return -EACCES; + /* Do we allow access to perf_event_open(2) ? */ - err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); + err = security_perf_event_open(PERF_SECURITY_OPEN); if (err) --- a/security/Kconfig +++ b/security/Kconfig -@@ -51,6 +51,15 @@ config PROC_MEM_NO_FORCE - - endchoice +@@ -72,6 +72,15 @@ config MSEAL_SYSTEM_MAPPINGS + For complete descriptions of memory sealing, please see + Documentation/userspace-api/mseal.rst +config SECURITY_PERF_EVENTS_RESTRICT + bool "Restrict unprivileged use of performance events" diff --git a/debian/patches/features/x86/intel-iommu-add-option-to-exclude-integrated-gpu-only.patch b/debian/patches/features/x86/intel-iommu-add-option-to-exclude-integrated-gpu-only.patch index c2ed722..2347d08 100644 --- a/debian/patches/features/x86/intel-iommu-add-option-to-exclude-integrated-gpu-only.patch +++ b/debian/patches/features/x86/intel-iommu-add-option-to-exclude-integrated-gpu-only.patch @@ -22,7 +22,7 @@ Signed-off-by: Ben Hutchings --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -2264,6 +2264,8 @@ +@@ -2288,6 +2288,8 @@ bypassed by not enabling DMAR with this option. In this case, gfx device will use physical address for DMA. @@ -68,7 +68,7 @@ Signed-off-by: Ben Hutchings } else if (!strncmp(str, "forcedac", 8)) { pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n"); iommu_dma_forcedac = true; -@@ -1902,6 +1910,9 @@ static int device_def_domain_type(struct +@@ -1935,6 +1943,9 @@ static int device_def_domain_type(struct if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) return IOMMU_DOMAIN_IDENTITY; @@ -78,7 +78,7 @@ Signed-off-by: Ben Hutchings } return 0; -@@ -2196,6 +2207,9 @@ static int __init init_dmars(void) +@@ -2229,6 +2240,9 @@ static int __init init_dmars(void) iommu_set_root_entry(iommu); } diff --git a/debian/patches/features/x86/x86-make-x32-syscall-support-conditional.patch b/debian/patches/features/x86/x86-make-x32-syscall-support-conditional.patch index edc964d..dc9e63f 100644 --- a/debian/patches/features/x86/x86-make-x32-syscall-support-conditional.patch +++ b/debian/patches/features/x86/x86-make-x32-syscall-support-conditional.patch @@ -29,7 +29,7 @@ Signed-off-by: Ben Hutchings --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -7004,6 +7004,10 @@ +@@ -7044,6 +7044,10 @@ later by a loaded module cannot be set this way. Example: sysctl.vm.swappiness=40 @@ -42,7 +42,7 @@ Signed-off-by: Ben Hutchings Ignore sysrq setting - this boot parameter will --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig -@@ -3202,6 +3202,14 @@ config COMPAT_32 +@@ -3169,6 +3169,14 @@ config COMPAT_32 select HAVE_UID16 select OLD_SIGSUSPEND3 @@ -57,9 +57,70 @@ Signed-off-by: Ben Hutchings config COMPAT def_bool y depends on IA32_EMULATION || X86_X32_ABI ---- a/arch/x86/entry/common.c -+++ b/arch/x86/entry/common.c -@@ -64,7 +64,7 @@ static __always_inline bool do_syscall_x +--- a/arch/x86/include/asm/elf.h ++++ b/arch/x86/include/asm/elf.h +@@ -12,6 +12,9 @@ + #include + #include + #include ++#ifndef COMPILE_OFFSETS /* avoid a circular dependency on asm-offsets.h */ ++#include ++#endif + + typedef unsigned long elf_greg_t; + +@@ -152,7 +155,8 @@ do { \ + + #define compat_elf_check_arch(x) \ + ((elf_check_arch_ia32(x) && ia32_enabled_verbose()) || \ +- (IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64)) ++ (IS_ENABLED(CONFIG_X86_X32_ABI) && x32_enabled && \ ++ (x)->e_machine == EM_X86_64)) + + static inline void elf_common_init(struct thread_struct *t, + struct pt_regs *regs, const u16 ds) +--- a/arch/x86/include/asm/syscall.h ++++ b/arch/x86/include/asm/syscall.h +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + #include /* for TS_COMPAT */ + #include + +@@ -28,6 +29,18 @@ extern long ia32_sys_call(const struct p + extern long x32_sys_call(const struct pt_regs *, unsigned int nr); + extern long x64_sys_call(const struct pt_regs *, unsigned int nr); + ++#if defined(CONFIG_X86_X32_ABI) ++#if defined(CONFIG_X86_X32_DISABLED) ++DECLARE_STATIC_KEY_FALSE(x32_enabled_skey); ++#define x32_enabled static_branch_unlikely(&x32_enabled_skey) ++#else ++DECLARE_STATIC_KEY_TRUE(x32_enabled_skey); ++#define x32_enabled static_branch_likely(&x32_enabled_skey) ++#endif ++#else ++#define x32_enabled 0 ++#endif ++ + /* + * Only the low 32 bits of orig_ax are meaningful, so we return int. + * This importantly ignores the high bits on 64-bit, so comparisons +--- a/arch/x86/entry/syscall_64.c ++++ b/arch/x86/entry/syscall_64.c +@@ -7,6 +7,9 @@ + #include + #include + #include ++#include ++#undef MODULE_PARAM_PREFIX ++#define MODULE_PARAM_PREFIX "syscall." + #include + + #define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *); +@@ -75,7 +78,7 @@ static __always_inline bool do_syscall_x */ unsigned int xnr = nr - __X32_SYSCALL_BIT; @@ -68,23 +129,12 @@ Signed-off-by: Ben Hutchings xnr = array_index_nospec(xnr, X32_NR_syscalls); regs->ax = x32_sys_call(regs, xnr); return true; ---- a/arch/x86/entry/syscall_x32.c -+++ b/arch/x86/entry/syscall_x32.c -@@ -4,6 +4,9 @@ - #include - #include - #include -+#include -+#undef MODULE_PARAM_PREFIX -+#define MODULE_PARAM_PREFIX "syscall." - #include - #include - -@@ -23,3 +26,46 @@ long x32_sys_call(const struct pt_regs * - default: return __x64_sys_ni_syscall(regs); - } - }; +@@ -139,3 +142,48 @@ __visible noinstr bool do_syscall_64(str + /* Use SYSRET to exit to userspace */ + return true; + } + ++#ifdef CONFIG_X86_X32_ABI +/* Maybe enable x32 syscalls */ + +#if defined(CONFIG_X86_X32_DISABLED) @@ -127,54 +177,4 @@ Signed-off-by: Ben Hutchings +}; + +arch_param_cb(x32, &x32_param_ops, NULL, 0444); ---- a/arch/x86/include/asm/elf.h -+++ b/arch/x86/include/asm/elf.h -@@ -12,6 +12,9 @@ - #include - #include - #include -+#ifndef COMPILE_OFFSETS /* avoid a circular dependency on asm-offsets.h */ -+#include +#endif - - typedef unsigned long elf_greg_t; - -@@ -151,7 +154,8 @@ do { \ - - #define compat_elf_check_arch(x) \ - ((elf_check_arch_ia32(x) && ia32_enabled_verbose()) || \ -- (IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64)) -+ (IS_ENABLED(CONFIG_X86_X32_ABI) && x32_enabled && \ -+ (x)->e_machine == EM_X86_64)) - - static inline void elf_common_init(struct thread_struct *t, - struct pt_regs *regs, const u16 ds) ---- a/arch/x86/include/asm/syscall.h -+++ b/arch/x86/include/asm/syscall.h -@@ -13,6 +13,7 @@ - #include - #include - #include -+#include - #include /* for TS_COMPAT */ - #include - -@@ -28,6 +29,18 @@ extern long ia32_sys_call(const struct p - extern long x32_sys_call(const struct pt_regs *, unsigned int nr); - extern long x64_sys_call(const struct pt_regs *, unsigned int nr); - -+#if defined(CONFIG_X86_X32_ABI) -+#if defined(CONFIG_X86_X32_DISABLED) -+DECLARE_STATIC_KEY_FALSE(x32_enabled_skey); -+#define x32_enabled static_branch_unlikely(&x32_enabled_skey) -+#else -+DECLARE_STATIC_KEY_TRUE(x32_enabled_skey); -+#define x32_enabled static_branch_likely(&x32_enabled_skey) -+#endif -+#else -+#define x32_enabled 0 -+#endif -+ - /* - * Only the low 32 bits of orig_ax are meaningful, so we return int. - * This importantly ignores the high bits on 64-bit, so comparisons diff --git a/debian/patches/krd/0001-Revert-objtool-dont-fail-the-kernel-build-on-fatal-errors.patch b/debian/patches/krd/0001-Revert-objtool-dont-fail-the-kernel-build-on-fatal-errors.patch deleted file mode 100644 index 70294ce..0000000 --- a/debian/patches/krd/0001-Revert-objtool-dont-fail-the-kernel-build-on-fatal-errors.patch +++ /dev/null @@ -1,52 +0,0 @@ -this reverts following commit: - - From: Josh Poimboeuf - Date: Thu, 14 Jan 2021 16:32:42 -0600 - Subject: objtool: Don't fail the kernel build on fatal errors - - [ Upstream commit 655cf86548a3938538642a6df27dd359e13c86bd ] - - This is basically a revert of commit 644592d32837 ("objtool: Fail the - kernel build on fatal errors"). - - That change turned out to be more trouble than it's worth. Failing the - build is an extreme measure which sometimes gets too much attention and - blocks CI build testing. - - These fatal-type warnings aren't yet as rare as we'd hope, due to the - ever-increasing matrix of supported toolchains/plugins and their - fast-changing nature as of late. - - Also, there are more people (and bots) looking for objtool warnings than - ever before, so even non-fatal warnings aren't likely to be ignored for - long. - - Suggested-by: Nick Desaulniers - Reviewed-by: Miroslav Benes - Reviewed-by: Nick Desaulniers - Reviewed-by: Kamalesh Babulal - Signed-off-by: Josh Poimboeuf - Signed-off-by: Sasha Levin - ---- a/tools/objtool/check.c -+++ b/tools/objtool/check.c -@@ -4783,10 +4783,14 @@ int check(struct objtool_file *file) - } - - out: -- /* -- * For now, don't fail the kernel build on fatal warnings. These -- * errors are still fairly common due to the growing matrix of -- * supported toolchains and their recent pace of change. -- */ -+ if (ret < 0) { -+ /* -+ * Fatal error. The binary is corrupt or otherwise broken in -+ * some way, or objtool itself is broken. Fail the kernel -+ * build. -+ */ -+ return ret; -+ } -+ - return 0; - } diff --git a/debian/patches/krd/0002-established-timeout.patch b/debian/patches/krd/0001-established-timeout.patch similarity index 100% rename from debian/patches/krd/0002-established-timeout.patch rename to debian/patches/krd/0001-established-timeout.patch diff --git a/debian/patches/krd/0003-local-ports.patch b/debian/patches/krd/0002-local-ports.patch similarity index 100% rename from debian/patches/krd/0003-local-ports.patch rename to debian/patches/krd/0002-local-ports.patch diff --git a/debian/patches/krd/0004-bridge-group_fwd_mask.patch b/debian/patches/krd/0003-bridge-group_fwd_mask.patch similarity index 100% rename from debian/patches/krd/0004-bridge-group_fwd_mask.patch rename to debian/patches/krd/0003-bridge-group_fwd_mask.patch diff --git a/debian/patches/krd/0005-certs-genkey.patch b/debian/patches/krd/0004-certs-genkey.patch similarity index 100% rename from debian/patches/krd/0005-certs-genkey.patch rename to debian/patches/krd/0004-certs-genkey.patch diff --git a/debian/patches/mixed-arch/0001-ZEN-Add-graysky-s-more-ISA-levels-and-uarches.patch b/debian/patches/mixed-arch/0001-graysky2-more-ISA-levels-and-uarches.patch similarity index 55% rename from debian/patches/mixed-arch/0001-ZEN-Add-graysky-s-more-ISA-levels-and-uarches.patch rename to debian/patches/mixed-arch/0001-graysky2-more-ISA-levels-and-uarches.patch index e56b2ec..a2f8c87 100644 --- a/debian/patches/mixed-arch/0001-ZEN-Add-graysky-s-more-ISA-levels-and-uarches.patch +++ b/debian/patches/mixed-arch/0001-graysky2-more-ISA-levels-and-uarches.patch @@ -1,14 +1,6 @@ -From 90b69178f6a866c7f3330c2006f6b5396146192c Mon Sep 17 00:00:00 2001 +From 906ed24dfc7e1bbceacc087ba38aecfd22a9890b Mon Sep 17 00:00:00 2001 From: graysky -Date: Mon, 16 Sep 2024 05:55:58 -0400 -Subject: ZEN: Add graysky's more-uarches -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -From https://github.com/graysky2/kernel_compiler_patch - -more-ISA-levels-and-uarches-for-kernel-6.1.79+.patch +Date: Mon, 16 Sep 2024 14:47:03 -0400 FEATURES This patch adds additional tunings via new x86-64 ISA levels and @@ -121,46 +113,122 @@ REFERENCES 1. https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html#index-x86-Options 2. https://bugzilla.kernel.org/show_bug.cgi?id=77461 3. https://github.com/graysky2/kernel_gcc_patch/issues/15 -4. https://www.linuxforge.net/docs/linux/linux-gcc.php +4. http://www.linuxforge.net/docs/linux/linux-gcc.php + --- - arch/x86/Kconfig.cpu | 367 ++++++++++++++++++++++++++++++-- - arch/x86/Makefile | 89 +++++++- - arch/x86/include/asm/vermagic.h | 72 +++++++ - 3 files changed, 511 insertions(+), 17 deletions(-) + arch/x86/Kconfig.cpu | 462 ++++++++++++++++++++++++++++++++++++++++++- + arch/x86/Makefile | 222 +++++++++++++++++++++ + 2 files changed, 675 insertions(+), 9 deletions(-) --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu -@@ -155,9 +155,8 @@ config MPENTIUM4 - -Paxville - -Dempsey +@@ -31,6 +31,7 @@ choice + - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron. + - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D). + - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird). ++ - "Opteron/Athlon64/Hammer/K8" for all K8 and newer AMD CPUs. + - "Crusoe" for the Transmeta Crusoe series. + - "Efficeon" for the Transmeta Efficeon series. + - "Winchip-C6" for original IDT Winchip. +@@ -41,7 +42,10 @@ choice + - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. + - "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above). + - "VIA C7" for VIA C7. ++ - "Intel P4" for the Pentium 4/Netburst microarchitecture. ++ - "Core 2/newer Xeon" for all core2 and newer Intel CPUs. + - "Intel Atom" for the Atom-microarchitecture CPUs. ++ - "Generic-x86-64" for a kernel which runs on any x86-64 CPU. + + See each option's help text for additional details. If you don't know + what to do, choose "Pentium-Pro". +@@ -135,10 +139,21 @@ config MPENTIUM4 + -Mobile Pentium 4 + -Mobile Pentium 4 M + -Extreme Edition (Gallatin) ++ -Prescott ++ -Prescott 2M ++ -Cedar Mill ++ -Presler ++ -Smithfiled + Xeons (Intel Xeon, Xeon MP, Xeon LV, Xeon MV) corename: + -Foster + -Prestonia + -Gallatin ++ -Nocona ++ -Irwindale ++ -Cranford ++ -Potomac ++ -Paxville ++ -Dempsey -- config MK6 -- bool "K6/K6-II/K6-III" -+ bool "AMD K6/K6-II/K6-III" - depends on X86_32 - help - Select this for an AMD K6-family processor. Enables use of -@@ -165,7 +164,7 @@ config MK6 - flags to GCC. + bool "K6/K6-II/K6-III" +@@ -245,6 +260,435 @@ config MATOM - config MK7 -- bool "Athlon/Duron/K7" -+ bool "AMD Athlon/Duron/K7" - depends on X86_32 - help - Select this for an AMD Athlon K7-family processor. Enables use of -@@ -173,12 +172,114 @@ config MK7 - flags to GCC. + endchoice - config MK8 -- bool "Opteron/Athlon64/Hammer/K8" ++config CC_HAS_MARCH_NATIVE ++ # This flag might not be available in cross-compilers: ++ def_bool $(cc-option, -march=native) ++ # LLVM 18 has an easily triggered internal compiler error in core ++ # networking code with '-march=native' on certain systems: ++ # https://github.com/llvm/llvm-project/issues/72026 ++ # LLVM 19 introduces an optimization that resolves some high stack ++ # usage warnings that only appear wth '-march=native'. ++ depends on CC_IS_GCC || CLANG_VERSION >= 190100 ++ ++choice ++ prompt "x86_64 Compiler Build Optimization" ++ default GENERIC_CPU ++ ++config X86_NATIVE_CPU ++ bool "Build and optimize for local/native CPU" ++ depends on X86_64 ++ depends on CC_HAS_MARCH_NATIVE ++ help ++ Optimize for the current CPU used to compile the kernel. ++ Use this option if you intend to build the kernel for your ++ local machine. ++ ++ Note that such a kernel might not work optimally on a ++ different x86 machine. ++ ++ If unsure, say N. ++ ++config GENERIC_CPU ++ bool "Generic-x86-64" ++ depends on X86_64 ++ help ++ Generic x86-64 CPU. ++ Runs equally well on all x86-64 CPUs. ++ ++config MNATIVE_INTEL ++ bool "Intel-Native optimizations autodetected by the compiler" ++ help ++ ++ Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects ++ the optimum settings to use based on your processor. Do NOT use this ++ for AMD CPUs. Intel Only! ++ ++ Enables -march=native ++ ++config MNATIVE_AMD ++ bool "AMD-Native optimizations autodetected by the compiler" ++ help ++ ++ Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects ++ the optimum settings to use based on your processor. Do NOT use this ++ for Intel CPUs. AMD Only! ++ ++ Enables -march=native ++ ++config MK8 + bool "AMD Opteron/Athlon64/Hammer/K8" - help - Select this for an AMD Opteron or Athlon64 Hammer-family processor. - Enables use of some extended instructions, and passes appropriate - optimization flags to GCC. - ++ help ++ Select this for an AMD Opteron or Athlon64 Hammer-family processor. ++ Enables use of some extended instructions, and passes appropriate ++ optimization flags to GCC. ++ +config MK8SSE3 + bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3" + help @@ -226,21 +294,21 @@ REFERENCES + Enables -march=bdver4 + +config MZEN -+ bool "AMD Zen" ++ bool "AMD Ryzen" + help + Select this for AMD Family 17h Zen processors. + + Enables -march=znver1 + +config MZEN2 -+ bool "AMD Zen 2" ++ bool "AMD Ryzen 2" + help + Select this for AMD Family 17h Zen 2 processors. + + Enables -march=znver2 + +config MZEN3 -+ bool "AMD Zen 3" ++ bool "AMD Ryzen 3" + depends on (CC_IS_GCC && GCC_VERSION >= 100300) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + help + Select this for AMD Family 19h Zen 3 processors. @@ -248,7 +316,7 @@ REFERENCES + Enables -march=znver3 + +config MZEN4 -+ bool "AMD Zen 4" ++ bool "AMD Ryzen 4" + depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 160000) + help + Select this for AMD Family 19h Zen 4 processors. @@ -256,57 +324,48 @@ REFERENCES + Enables -march=znver4 + +config MZEN5 -+ bool "AMD Zen 5" ++ bool "AMD Ryzen 5" + depends on (CC_IS_GCC && GCC_VERSION > 140000) || (CC_IS_CLANG && CLANG_VERSION >= 190100) + help + Select this for AMD Family 19h Zen 5 processors. + + Enables -march=znver5 + - config MCRUSOE - bool "Crusoe" - depends on X86_32 -@@ -269,8 +370,17 @@ config MPSC - using the cpu family field - in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. - -+config MATOM -+ bool "Intel Atom" ++config MPSC ++ bool "Intel P4 / older Netburst based Xeon" ++ depends on X86_64 ++ help ++ Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey ++ Xeon CPUs with Intel 64bit which is compatible with x86-64. ++ Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the ++ Netburst core and shouldn't use this option. You can distinguish them ++ using the cpu family field ++ in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. ++ ++config MCORE2 ++ bool "Intel Core 2" ++ depends on X86_64 + help + -+ Select this for the Intel Atom platform. Intel Atom CPUs have an -+ in-order pipelining architecture and thus can benefit from -+ accordingly optimized code. Use a recent GCC with specific Atom -+ support in order to fully benefit from selecting this option. ++ Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and ++ 53xx) CPUs. You can distinguish newer from older Xeons by the CPU ++ family in /proc/cpuinfo. Newer ones have 6 and older ones 15 ++ (not a typo) + - config MCORE2 -- bool "Core 2/newer Xeon" -+ bool "Intel Core 2" - help - - Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and -@@ -278,14 +388,199 @@ config MCORE2 - family in /proc/cpuinfo. Newer ones have 6 and older ones 15 - (not a typo) - --config MATOM -- bool "Intel Atom" + Enables -march=core2 + +config MNEHALEM + bool "Intel Nehalem" - help - -- Select this for the Intel Atom platform. Intel Atom CPUs have an -- in-order pipelining architecture and thus can benefit from -- accordingly optimized code. Use a recent GCC with specific Atom -- support in order to fully benefit from selecting this option. ++ depends on X86_64 ++ help ++ + Select this for 1st Gen Core processors in the Nehalem family. + + Enables -march=nehalem + +config MWESTMERE + bool "Intel Westmere" ++ depends on X86_64 + help + + Select this for the Intel Westmere formerly Nehalem-C family. @@ -315,6 +374,7 @@ REFERENCES + +config MSILVERMONT + bool "Intel Silvermont" ++ depends on X86_64 + help + + Select this for the Intel Silvermont platform. @@ -323,6 +383,7 @@ REFERENCES + +config MGOLDMONT + bool "Intel Goldmont" ++ depends on X86_64 + help + + Select this for the Intel Goldmont platform including Apollo Lake and Denverton. @@ -331,6 +392,7 @@ REFERENCES + +config MGOLDMONTPLUS + bool "Intel Goldmont Plus" ++ depends on X86_64 + help + + Select this for the Intel Goldmont Plus platform including Gemini Lake. @@ -339,6 +401,7 @@ REFERENCES + +config MSANDYBRIDGE + bool "Intel Sandy Bridge" ++ depends on X86_64 + help + + Select this for 2nd Gen Core processors in the Sandy Bridge family. @@ -347,6 +410,7 @@ REFERENCES + +config MIVYBRIDGE + bool "Intel Ivy Bridge" ++ depends on X86_64 + help + + Select this for 3rd Gen Core processors in the Ivy Bridge family. @@ -355,6 +419,7 @@ REFERENCES + +config MHASWELL + bool "Intel Haswell" ++ depends on X86_64 + help + + Select this for 4th Gen Core processors in the Haswell family. @@ -363,6 +428,7 @@ REFERENCES + +config MBROADWELL + bool "Intel Broadwell" ++ depends on X86_64 + help + + Select this for 5th Gen Core processors in the Broadwell family. @@ -371,6 +437,7 @@ REFERENCES + +config MSKYLAKE + bool "Intel Skylake" ++ depends on X86_64 + help + + Select this for 6th Gen Core processors in the Skylake family. @@ -379,6 +446,7 @@ REFERENCES + +config MSKYLAKEX + bool "Intel Skylake X" ++ depends on X86_64 + help + + Select this for 6th Gen Core processors in the Skylake X family. @@ -387,6 +455,7 @@ REFERENCES + +config MCANNONLAKE + bool "Intel Cannon Lake" ++ depends on X86_64 + help + + Select this for 8th Gen Core processors @@ -395,6 +464,7 @@ REFERENCES + +config MICELAKE_CLIENT + bool "Intel Ice Lake" ++ depends on X86_64 + help + + Select this for 10th Gen Core client processors in the Ice Lake family. @@ -403,22 +473,16 @@ REFERENCES + +config MICELAKE_SERVER + bool "Intel Ice Lake Server" ++ depends on X86_64 + help + + Select this for 10th Gen Core server processors in the Ice Lake family. + + Enables -march=icelake-server + -+config MCASCADELAKE -+ bool "Intel Cascade Lake" -+ help -+ -+ Select this for Xeon processors in the Cascade Lake family. -+ -+ Enables -march=cascadelake -+ +config MCOOPERLAKE + bool "Intel Cooper Lake" ++ depends on X86_64 + depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000) + help + @@ -426,8 +490,19 @@ REFERENCES + + Enables -march=cooperlake + ++config MCASCADELAKE ++ bool "Intel Cascade Lake" ++ depends on X86_64 ++ depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000) ++ help ++ ++ Select this for Xeon processors in the Cascade Lake family. ++ ++ Enables -march=cascadelake ++ +config MTIGERLAKE + bool "Intel Tiger Lake" ++ depends on X86_64 + depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000) + help + @@ -437,6 +512,7 @@ REFERENCES + +config MSAPPHIRERAPIDS + bool "Intel Sapphire Rapids" ++ depends on X86_64 + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + help + @@ -446,6 +522,7 @@ REFERENCES + +config MROCKETLAKE + bool "Intel Rocket Lake" ++ depends on X86_64 + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + help + @@ -455,6 +532,7 @@ REFERENCES + +config MALDERLAKE + bool "Intel Alder Lake" ++ depends on X86_64 + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + help + @@ -464,6 +542,7 @@ REFERENCES + +config MRAPTORLAKE + bool "Intel Raptor Lake" ++ depends on X86_64 + depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500) + help + @@ -473,6 +552,7 @@ REFERENCES + +config MMETEORLAKE + bool "Intel Meteor Lake" ++ depends on X86_64 + depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500) + help + @@ -482,46 +562,16 @@ REFERENCES + +config MEMERALDRAPIDS + bool "Intel Emerald Rapids" ++ depends on X86_64 + depends on (CC_IS_GCC && GCC_VERSION > 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500) + help + + Select this for fifth-generation 10 nm process processors in the Emerald Rapids family. + + Enables -march=emeraldrapids - - config GENERIC_CPU - bool "Generic-x86-64" -@@ -294,6 +589,26 @@ config GENERIC_CPU - Generic x86-64 CPU. - Run equally well on all x86-64 CPUs. - -+config MNATIVE_INTEL -+ bool "Intel-Native optimizations autodetected by the compiler" -+ help + -+ Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects -+ the optimum settings to use based on your processor. Do NOT use this -+ for AMD CPUs. Intel Only! ++endchoice + -+ Enables -march=native -+ -+config MNATIVE_AMD -+ bool "AMD-Native optimizations autodetected by the compiler" -+ help -+ -+ Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects -+ the optimum settings to use based on your processor. Do NOT use this -+ for Intel CPUs. AMD Only! -+ -+ Enables -march=native -+ - endchoice - - config X86_GENERIC -@@ -308,6 +623,30 @@ config X86_GENERIC - This is really intended for distributors who need more - generic optimizations. - +config X86_64_VERSION + int "x86-64 compiler ISA level" + range 1 3 @@ -531,7 +581,7 @@ REFERENCES + Specify a specific x86-64 compiler ISA level. + + There are three x86-64 ISA levels that work on top of -+ the x86-64 baseline, namely: x86-64-v2, x86-64-v3, and x86-64-v4. ++ the x86-64 baseline, namely: x86-64-v2 and x86-64-v3. + + x86-64-v2 brings support for vector instructions up to Streaming SIMD + Extensions 4.2 (SSE4.2) and Supplemental Streaming SIMD Extensions 3 @@ -546,221 +596,291 @@ REFERENCES + /lib/ld-linux-x86-64.so.2 --help | grep supported + /lib64/ld-linux-x86-64.so.2 --help | grep supported + - # - # Define implied options from the CPU selection here - config X86_INTERNODE_CACHE_SHIFT -@@ -318,7 +657,7 @@ config X86_INTERNODE_CACHE_SHIFT + config X86_GENERIC + bool "Generic x86 support" + depends on X86_32 +@@ -266,8 +710,8 @@ config X86_INTERNODE_CACHE_SHIFT + config X86_L1_CACHE_SHIFT int - default "7" if MPENTIUM4 || MPSC -- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU +- default "7" if MPENTIUM4 +- default "6" if MK7 || MPENTIUMM || MATOM || MVIAC7 || X86_GENERIC || X86_64 ++ default "7" if MPENTIUM4 || MPSC + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE_CLIENT || MICELAKE_SERVER || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD default "4" if MELAN || M486SX || M486 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX -@@ -336,11 +675,11 @@ config X86_ALIGNMENT_16 +@@ -285,19 +729,19 @@ config X86_ALIGNMENT_16 config X86_INTEL_USERCOPY def_bool y -- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 +- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK7 || MEFFICEON + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE_CLIENT || MICELAKE_SERVER || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL config X86_USE_PPRO_CHECKSUM def_bool y -- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM +- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MATOM + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE_CLIENT || MICELAKE_SERVER || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD - # - # P6_NOPs are a relatively minor optimization that require a family >= + config X86_TSC + def_bool y +- depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MATOM) || X86_64 ++ depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64 + + config X86_HAVE_PAE + def_bool y +- depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC7 || MATOM || X86_64 ++ depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64 + + config X86_CX8 + def_bool y +@@ -307,13 +751,13 @@ config X86_CX8 + # generates cmov. + config X86_CMOV + def_bool y +- depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || MATOM || MGEODE_LX || X86_64) ++ depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) + + config X86_MINIMUM_CPU_FAMILY + int + default "64" if X86_64 +- default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MK7) +- default "5" if X86_32 && X86_CX8 ++ default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8) ++ default "5" if X86_32 && X86_CMPXCHG64 + default "4" + + config X86_DEBUGCTLMSR --- a/arch/x86/Makefile +++ b/arch/x86/Makefile -@@ -182,15 +182,98 @@ else - cflags-$(CONFIG_MK8) += -march=k8 - cflags-$(CONFIG_MPSC) += -march=nocona - cflags-$(CONFIG_MCORE2) += -march=core2 -- cflags-$(CONFIG_MATOM) += -march=atom -- cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic -+ cflags-$(CONFIG_MATOM) += -march=bonnell -+ ifeq ($(CONFIG_X86_64_VERSION),1) -+ cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic -+ rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic -+ else -+ cflags-$(CONFIG_GENERIC_CPU) += -march=x86-64-v$(CONFIG_X86_64_VERSION) -+ rustflags-$(CONFIG_GENERIC_CPU) += -Ctarget-cpu=x86-64-v$(CONFIG_X86_64_VERSION) -+ endif -+ cflags-$(CONFIG_MK8SSE3) += -march=k8-sse3 -+ cflags-$(CONFIG_MK10) += -march=amdfam10 -+ cflags-$(CONFIG_MBARCELONA) += -march=barcelona -+ cflags-$(CONFIG_MBOBCAT) += -march=btver1 -+ cflags-$(CONFIG_MJAGUAR) += -march=btver2 -+ cflags-$(CONFIG_MBULLDOZER) += -march=bdver1 -+ cflags-$(CONFIG_MPILEDRIVER) += -march=bdver2 -mno-tbm -+ cflags-$(CONFIG_MSTEAMROLLER) += -march=bdver3 -mno-tbm -+ cflags-$(CONFIG_MEXCAVATOR) += -march=bdver4 -mno-tbm -+ cflags-$(CONFIG_MZEN) += -march=znver1 -+ cflags-$(CONFIG_MZEN2) += -march=znver2 -+ cflags-$(CONFIG_MZEN3) += -march=znver3 -+ cflags-$(CONFIG_MZEN4) += -march=znver4 -+ cflags-$(CONFIG_MZEN5) += -march=znver5 -+ cflags-$(CONFIG_MNATIVE_INTEL) += -march=native -+ cflags-$(CONFIG_MNATIVE_AMD) += -march=native -mno-tbm -+ cflags-$(CONFIG_MNEHALEM) += -march=nehalem -+ cflags-$(CONFIG_MWESTMERE) += -march=westmere -+ cflags-$(CONFIG_MSILVERMONT) += -march=silvermont -+ cflags-$(CONFIG_MGOLDMONT) += -march=goldmont -+ cflags-$(CONFIG_MGOLDMONTPLUS) += -march=goldmont-plus -+ cflags-$(CONFIG_MSANDYBRIDGE) += -march=sandybridge -+ cflags-$(CONFIG_MIVYBRIDGE) += -march=ivybridge -+ cflags-$(CONFIG_MHASWELL) += -march=haswell -+ cflags-$(CONFIG_MBROADWELL) += -march=broadwell -+ cflags-$(CONFIG_MSKYLAKE) += -march=skylake -+ cflags-$(CONFIG_MSKYLAKEX) += -march=skylake-avx512 -+ cflags-$(CONFIG_MCANNONLAKE) += -march=cannonlake -+ cflags-$(CONFIG_MICELAKE_CLIENT) += -march=icelake-client -+ cflags-$(CONFIG_MICELAKE_SERVER) += -march=icelake-server -+ cflags-$(CONFIG_MCASCADELAKE) += -march=cascadelake -+ cflags-$(CONFIG_MCOOPERLAKE) += -march=cooperlake -+ cflags-$(CONFIG_MTIGERLAKE) += -march=tigerlake -+ cflags-$(CONFIG_MSAPPHIRERAPIDS) += -march=sapphirerapids -+ cflags-$(CONFIG_MROCKETLAKE) += -march=rocketlake -+ cflags-$(CONFIG_MALDERLAKE) += -march=alderlake -+ cflags-$(CONFIG_MRAPTORLAKE) += -march=raptorlake -+ cflags-$(CONFIG_MMETEORLAKE) += -march=meteorlake -+ cflags-$(CONFIG_MEMERALDRAPIDS) += -march=emeraldrapids - KBUILD_CFLAGS += $(cflags-y) +@@ -173,8 +173,230 @@ else + # Use -mskip-rax-setup if supported. + KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) - rustflags-$(CONFIG_MK8) += -Ctarget-cpu=k8 - rustflags-$(CONFIG_MPSC) += -Ctarget-cpu=nocona - rustflags-$(CONFIG_MCORE2) += -Ctarget-cpu=core2 - rustflags-$(CONFIG_MATOM) += -Ctarget-cpu=atom -- rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic -+ rustflags-$(CONFIG_MK8SSE3) += -Ctarget-cpu=k8-sse3 -+ rustflags-$(CONFIG_MK10) += -Ctarget-cpu=amdfam10 -+ rustflags-$(CONFIG_MBARCELONA) += -Ctarget-cpu=barcelona -+ rustflags-$(CONFIG_MBOBCAT) += -Ctarget-cpu=btver1 -+ rustflags-$(CONFIG_MJAGUAR) += -Ctarget-cpu=btver2 -+ rustflags-$(CONFIG_MBULLDOZER) += -Ctarget-cpu=bdver1 -+ rustflags-$(CONFIG_MPILEDRIVER) += -Ctarget-cpu=bdver2 -+ rustflags-$(CONFIG_MSTEAMROLLER) += -Ctarget-cpu=bdver3 -+ rustflags-$(CONFIG_MEXCAVATOR) += -Ctarget-cpu=bdver4 -+ rustflags-$(CONFIG_MZEN) += -Ctarget-cpu=znver1 -+ rustflags-$(CONFIG_MZEN2) += -Ctarget-cpu=znver2 -+ rustflags-$(CONFIG_MZEN3) += -Ctarget-cpu=znver3 -+ rustflags-$(CONFIG_MZEN4) += -Ctarget-cpu=znver4 -+ rustflags-$(CONFIG_MZEN5) += -Ctarget-cpu=znver5 -+ rustflags-$(CONFIG_MNATIVE_INTEL) += -Ctarget-cpu=native -+ rustflags-$(CONFIG_MNATIVE_AMD) += -Ctarget-cpu=native -+ rustflags-$(CONFIG_MNEHALEM) += -Ctarget-cpu=nehalem -+ rustflags-$(CONFIG_MWESTMERE) += -Ctarget-cpu=westmere -+ rustflags-$(CONFIG_MSILVERMONT) += -Ctarget-cpu=silvermont -+ rustflags-$(CONFIG_MGOLDMONT) += -Ctarget-cpu=goldmont -+ rustflags-$(CONFIG_MGOLDMONTPLUS) += -Ctarget-cpu=goldmont-plus -+ rustflags-$(CONFIG_MSANDYBRIDGE) += -Ctarget-cpu=sandybridge -+ rustflags-$(CONFIG_MIVYBRIDGE) += -Ctarget-cpu=ivybridge -+ rustflags-$(CONFIG_MHASWELL) += -Ctarget-cpu=haswell -+ rustflags-$(CONFIG_MBROADWELL) += -Ctarget-cpu=broadwell -+ rustflags-$(CONFIG_MSKYLAKE) += -Ctarget-cpu=skylake -+ rustflags-$(CONFIG_MSKYLAKEX) += -Ctarget-cpu=skylake-avx512 -+ rustflags-$(CONFIG_MCANNONLAKE) += -Ctarget-cpu=cannonlake -+ rustflags-$(CONFIG_MICELAKE_CLIENT) += -Ctarget-cpu=icelake-client -+ rustflags-$(CONFIG_MICELAKE_SERVER) += -Ctarget-cpu=icelake-server -+ rustflags-$(CONFIG_MCASCADELAKE) += -Ctarget-cpu=cascadelake -+ rustflags-$(CONFIG_MCOOPERLAKE) += -Ctarget-cpu=cooperlake -+ rustflags-$(CONFIG_MTIGERLAKE) += -Ctarget-cpu=tigerlake -+ rustflags-$(CONFIG_MSAPPHIRERAPIDS) += -Ctarget-cpu=sapphirerapids -+ rustflags-$(CONFIG_MROCKETLAKE) += -Ctarget-cpu=rocketlake -+ rustflags-$(CONFIG_MALDERLAKE) += -Ctarget-cpu=alderlake -+ rustflags-$(CONFIG_MRAPTORLAKE) += -Ctarget-cpu=raptorlake -+ rustflags-$(CONFIG_MMETEORLAKE) += -Ctarget-cpu=meteorlake -+ rustflags-$(CONFIG_MEMERALDRAPIDS) += -Ctarget-cpu=emeraldrapids - KBUILD_RUSTFLAGS += $(rustflags-y) ++ifdef CONFIG_X86_NATIVE_CPU ++ KBUILD_CFLAGS += -march=native ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=native ++endif ++ ++ifdef CONFIG_MNATIVE_INTEL ++ KBUILD_CFLAGS += -march=native ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=native ++endif ++ ++ifdef CONFIG_MNATIVE_AMD ++ KBUILD_CFLAGS += -march=native ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=native ++endif ++ ++ifdef CONFIG_MK8 ++ KBUILD_CFLAGS += -march=k8 ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=k8 ++endif ++ ++ifdef CONFIG_MK8SSE3 ++ KBUILD_CFLAGS += -march=k8-sse3 ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=k8-sse3 ++endif ++ ++ifdef CONFIG_MK10 ++ KBUILD_CFLAGS += -march=amdfam10 ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=amdfam10 ++endif ++ ++ifdef CONFIG_MBARCELONA ++ KBUILD_CFLAGS += -march=barcelona ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=barcelona ++endif ++ ++ifdef CONFIG_MBOBCAT ++ KBUILD_CFLAGS += -march=btver1 ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=btver1 ++endif ++ ++ifdef CONFIG_MJAGUAR ++ KBUILD_CFLAGS += -march=btver2 ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=btver2 ++endif ++ ++ifdef CONFIG_MBULLDOZER ++ KBUILD_CFLAGS += -march=bdver1 ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=bdver1 ++endif ++ ++ifdef CONFIG_MPILEDRIVER ++ KBUILD_CFLAGS += -march=bdver2 -mno-tbm ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=bdver2 -mno-tbm ++endif ++ ++ifdef CONFIG_MSTEAMROLLER ++ KBUILD_CFLAGS += -march=bdver3 -mno-tbm ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=bdver3 -mno-tbm ++endif ++ ++ifdef CONFIG_MEXCAVATOR ++ KBUILD_CFLAGS += -march=bdver4 -mno-tbm ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=bdver4 -mno-tbm ++endif ++ ++ifdef CONFIG_MZEN ++ KBUILD_CFLAGS += -march=znver1 ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=znver1 ++endif ++ ++ifdef CONFIG_MZEN2 ++ KBUILD_CFLAGS += -march=znver2 ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=znver2 ++endif ++ ++ifdef CONFIG_MZEN3 ++ KBUILD_CFLAGS += -march=znver3 ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=znver3 ++endif ++ ++ifdef CONFIG_MZEN4 ++ KBUILD_CFLAGS += -march=znver4 ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=znver4 ++endif ++ ++ifdef CONFIG_MZEN5 ++ KBUILD_CFLAGS += -march=znver5 ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=znver5 ++endif ++ ++ifdef CONFIG_MPSC ++ KBUILD_CFLAGS += -march=nocona ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=nocona ++endif ++ ++ifdef CONFIG_MCORE2 ++ KBUILD_CFLAGS += -march=core2 ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=core2 ++endif ++ ++ifdef CONFIG_MNEHALEM ++ KBUILD_CFLAGS += -march=nehalem ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=nehalem ++endif ++ ++ifdef CONFIG_MWESTMERE ++ KBUILD_CFLAGS += -march=westmere ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=westmere ++endif ++ ++ifdef CONFIG_MSILVERMONT ++ KBUILD_CFLAGS += -march=silvermont ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=silvermont ++endif ++ ++ifdef CONFIG_MGOLDMONT ++ KBUILD_CFLAGS += -march=goldmont ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=goldmont ++endif ++ ++ifdef CONFIG_MGOLDMONTPLUS ++ KBUILD_CFLAGS += -march=goldmont-plus ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=goldmont-plus ++endif ++ ++ifdef CONFIG_MSANDYBRIDGE ++ KBUILD_CFLAGS += -march=sandybridge ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=sandybridge ++endif ++ ++ifdef CONFIG_MIVYBRIDGE ++ KBUILD_CFLAGS += -march=ivybridge ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=ivybridge ++endif ++ ++ifdef CONFIG_MHASWELL ++ KBUILD_CFLAGS += -march=haswell ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=haswell ++endif ++ ++ifdef CONFIG_MBROADWELL ++ KBUILD_CFLAGS += -march=broadwell ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=broadwell ++endif ++ ++ifdef CONFIG_MSKYLAKE ++ KBUILD_CFLAGS += -march=skylake ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=skylake ++endif ++ ++ifdef CONFIG_MSKYLAKEX ++ KBUILD_CFLAGS += -march=skylake-avx512 ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=skylake-avx512 ++endif ++ ++ifdef CONFIG_MCANNONLAKE ++ KBUILD_CFLAGS += -march=cannonlake ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=cannonlake ++endif ++ ++ifdef CONFIG_MICELAKE_CLIENT ++ KBUILD_CFLAGS += -march=icelake-client ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=icelake-client ++endif ++ ++ifdef CONFIG_MICELAKE_SERVER ++ KBUILD_CFLAGS += -march=icelake-server ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=icelake-server ++endif ++ ++ifdef CONFIG_MCOOPERLAKE ++ KBUILD_CFLAGS += -march=cooperlake ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=cooperlake ++endif ++ ++ifdef CONFIG_MCASCADELAKE ++ KBUILD_CFLAGS += -march=cascadelake ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=cascadelake ++endif ++ ++ifdef CONFIG_MTIGERLAKE ++ KBUILD_CFLAGS += -march=tigerlake ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=tigerlake ++endif ++ ++ifdef CONFIG_MSAPPHIRERAPIDS ++ KBUILD_CFLAGS += -march=sapphirerapids ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=sapphirerapids ++endif ++ ++ifdef CONFIG_MROCKETLAKE ++ KBUILD_CFLAGS += -march=rocketlake ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=rocketlake ++endif ++ ++ifdef CONFIG_MALDERLAKE ++ KBUILD_CFLAGS += -march=alderlake ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=alderlake ++endif ++ ++ifdef CONFIG_MRAPTORLAKE ++ KBUILD_CFLAGS += -march=raptorlake ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=raptorlake ++endif ++ ++ifdef CONFIG_MMETEORLAKE ++ KBUILD_CFLAGS += -march=meteorlake ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=meteorlake ++endif ++ ++ifdef CONFIG_MEMERALDRAPIDS ++ KBUILD_CFLAGS += -march=emeraldrapids ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=emeraldrapids ++endif ++ ++ifdef CONFIG_GENERIC_CPU ++ifeq ($(CONFIG_X86_64_VERSION),1) + KBUILD_CFLAGS += -march=x86-64 -mtune=generic + KBUILD_RUSTFLAGS += -Ctarget-cpu=x86-64 -Ztune-cpu=generic ++else ++ KBUILD_CFLAGS +=-march=x86-64-v$(CONFIG_X86_64_VERSION) ++ KBUILD_RUSTFLAGS += -Ctarget-cpu=x86-64-v$(CONFIG_X86_64_VERSION) ++endif # CONFIG_X86_64_VERSION ++endif # CONFIG_GENERIC_CPU KBUILD_CFLAGS += -mno-red-zone ---- a/arch/x86/include/asm/vermagic.h -+++ b/arch/x86/include/asm/vermagic.h -@@ -17,6 +17,56 @@ - #define MODULE_PROC_FAMILY "586MMX " - #elif defined CONFIG_MCORE2 - #define MODULE_PROC_FAMILY "CORE2 " -+#elif defined CONFIG_MNATIVE_INTEL -+#define MODULE_PROC_FAMILY "NATIVE_INTEL " -+#elif defined CONFIG_MNATIVE_AMD -+#define MODULE_PROC_FAMILY "NATIVE_AMD " -+#elif defined CONFIG_MNEHALEM -+#define MODULE_PROC_FAMILY "NEHALEM " -+#elif defined CONFIG_MWESTMERE -+#define MODULE_PROC_FAMILY "WESTMERE " -+#elif defined CONFIG_MSILVERMONT -+#define MODULE_PROC_FAMILY "SILVERMONT " -+#elif defined CONFIG_MGOLDMONT -+#define MODULE_PROC_FAMILY "GOLDMONT " -+#elif defined CONFIG_MGOLDMONTPLUS -+#define MODULE_PROC_FAMILY "GOLDMONTPLUS " -+#elif defined CONFIG_MSANDYBRIDGE -+#define MODULE_PROC_FAMILY "SANDYBRIDGE " -+#elif defined CONFIG_MIVYBRIDGE -+#define MODULE_PROC_FAMILY "IVYBRIDGE " -+#elif defined CONFIG_MHASWELL -+#define MODULE_PROC_FAMILY "HASWELL " -+#elif defined CONFIG_MBROADWELL -+#define MODULE_PROC_FAMILY "BROADWELL " -+#elif defined CONFIG_MSKYLAKE -+#define MODULE_PROC_FAMILY "SKYLAKE " -+#elif defined CONFIG_MSKYLAKEX -+#define MODULE_PROC_FAMILY "SKYLAKEX " -+#elif defined CONFIG_MCANNONLAKE -+#define MODULE_PROC_FAMILY "CANNONLAKE " -+#elif defined CONFIG_MICELAKE_CLIENT -+#define MODULE_PROC_FAMILY "ICELAKE_CLIENT " -+#elif defined CONFIG_MICELAKE_SERVER -+#define MODULE_PROC_FAMILY "ICELAKE_SERVER " -+#elif defined CONFIG_MCASCADELAKE -+#define MODULE_PROC_FAMILY "CASCADELAKE " -+#elif defined CONFIG_MCOOPERLAKE -+#define MODULE_PROC_FAMILY "COOPERLAKE " -+#elif defined CONFIG_MTIGERLAKE -+#define MODULE_PROC_FAMILY "TIGERLAKE " -+#elif defined CONFIG_MSAPPHIRERAPIDS -+#define MODULE_PROC_FAMILY "SAPPHIRERAPIDS " -+#elif defined CONFIG_ROCKETLAKE -+#define MODULE_PROC_FAMILY "ROCKETLAKE " -+#elif defined CONFIG_MALDERLAKE -+#define MODULE_PROC_FAMILY "ALDERLAKE " -+#elif defined CONFIG_MRAPTORLAKE -+#define MODULE_PROC_FAMILY "RAPTORLAKE " -+#elif defined CONFIG_MMETEORLAKE -+#define MODULE_PROC_FAMILY "METEORLAKE " -+#elif defined CONFIG_MEMERALDRAPIDS -+#define MODULE_PROC_FAMILY "EMERALDRAPIDS " - #elif defined CONFIG_MATOM - #define MODULE_PROC_FAMILY "ATOM " - #elif defined CONFIG_M686 -@@ -35,6 +85,28 @@ - #define MODULE_PROC_FAMILY "K7 " - #elif defined CONFIG_MK8 - #define MODULE_PROC_FAMILY "K8 " -+#elif defined CONFIG_MK8SSE3 -+#define MODULE_PROC_FAMILY "K8SSE3 " -+#elif defined CONFIG_MK10 -+#define MODULE_PROC_FAMILY "K10 " -+#elif defined CONFIG_MBARCELONA -+#define MODULE_PROC_FAMILY "BARCELONA " -+#elif defined CONFIG_MBOBCAT -+#define MODULE_PROC_FAMILY "BOBCAT " -+#elif defined CONFIG_MBULLDOZER -+#define MODULE_PROC_FAMILY "BULLDOZER " -+#elif defined CONFIG_MPILEDRIVER -+#define MODULE_PROC_FAMILY "PILEDRIVER " -+#elif defined CONFIG_MSTEAMROLLER -+#define MODULE_PROC_FAMILY "STEAMROLLER " -+#elif defined CONFIG_MJAGUAR -+#define MODULE_PROC_FAMILY "JAGUAR " -+#elif defined CONFIG_MEXCAVATOR -+#define MODULE_PROC_FAMILY "EXCAVATOR " -+#elif defined CONFIG_MZEN -+#define MODULE_PROC_FAMILY "ZEN " -+#elif defined CONFIG_MZEN2 -+#define MODULE_PROC_FAMILY "ZEN2 " - #elif defined CONFIG_MELAN - #define MODULE_PROC_FAMILY "ELAN " - #elif defined CONFIG_MCRUSOE + KBUILD_CFLAGS += -mcmodel=kernel diff --git a/debian/patches/mixed-arch/0002-ZEN-Restore-CONFIG_OPTIMIZE_FOR_PERFORMANCE_O3.patch b/debian/patches/mixed-arch/0002-ZEN-Restore-CONFIG_OPTIMIZE_FOR_PERFORMANCE_O3.patch index 69beee0..642d3cc 100644 --- a/debian/patches/mixed-arch/0002-ZEN-Restore-CONFIG_OPTIMIZE_FOR_PERFORMANCE_O3.patch +++ b/debian/patches/mixed-arch/0002-ZEN-Restore-CONFIG_OPTIMIZE_FOR_PERFORMANCE_O3.patch @@ -1,4 +1,4 @@ -From f4f448a305e9d705b9a0da102ddfd58bfaac5cc0 Mon Sep 17 00:00:00 2001 +From 15db9c3419fd147812151d95fb34bbd70f2f9715 Mon Sep 17 00:00:00 2001 From: "Jan Alexander Steffens (heftig)" Date: Sun, 11 Dec 2022 23:51:16 +0100 Subject: ZEN: Restore CONFIG_OPTIMIZE_FOR_PERFORMANCE_O3 @@ -13,7 +13,7 @@ dependency on CONFIG_ARC and adds RUSTFLAGS. --- a/Makefile +++ b/Makefile -@@ -871,6 +871,9 @@ KBUILD_CFLAGS += -fno-delete-null-pointe +@@ -868,6 +868,9 @@ KBUILD_CFLAGS += -fno-delete-null-pointe ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE KBUILD_CFLAGS += -O2 KBUILD_RUSTFLAGS += -Copt-level=2 @@ -25,7 +25,7 @@ dependency on CONFIG_ARC and adds RUSTFLAGS. KBUILD_RUSTFLAGS += -Copt-level=s --- a/init/Kconfig +++ b/init/Kconfig -@@ -1473,6 +1473,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE +@@ -1479,6 +1479,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE with the "-O2" compiler flag for best performance and most helpful compile-time warnings. diff --git a/debian/patches/mixed-arch/0003-krd-adjust-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3.patch b/debian/patches/mixed-arch/0003-krd-adjust-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3.patch index 3e8d9e6..ce9fcb5 100644 --- a/debian/patches/mixed-arch/0003-krd-adjust-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3.patch +++ b/debian/patches/mixed-arch/0003-krd-adjust-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3.patch @@ -1,6 +1,6 @@ --- a/Makefile +++ b/Makefile -@@ -879,6 +879,10 @@ KBUILD_CFLAGS += -Os +@@ -876,6 +876,10 @@ KBUILD_CFLAGS += -Os KBUILD_RUSTFLAGS += -Copt-level=s endif diff --git a/debian/patches/mixed-arch/0004-XANMOD-x86-build-Prevent-generating-avx2-and-avx512-.patch b/debian/patches/mixed-arch/0004-XANMOD-x86-build-Prevent-generating-avx2-and-avx512-.patch index 0257629..68063bd 100644 --- a/debian/patches/mixed-arch/0004-XANMOD-x86-build-Prevent-generating-avx2-and-avx512-.patch +++ b/debian/patches/mixed-arch/0004-XANMOD-x86-build-Prevent-generating-avx2-and-avx512-.patch @@ -1,24 +1,22 @@ -From 3ebc1fdf3e0ee9bff1efe20eb5791eba5c84a810 Mon Sep 17 00:00:00 2001 +From 40f9fa82bb21a5e3f17f539897128a69824ad8ef Mon Sep 17 00:00:00 2001 From: Alexandre Frade -Date: Thu, 3 Aug 2023 13:53:49 +0000 -Subject: XANMOD: x86/build: Prevent generating avx2 and avx512 floating-point code +Date: Mon, 18 Nov 2024 20:17:44 +0000 +Subject: [PATCH 1/4] XANMOD: x86/build: Prevent generating avx2 floating-point + code Signed-off-by: Alexandre Frade --- - arch/x86/Makefile | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) + arch/x86/Makefile | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) --- a/arch/x86/Makefile +++ b/arch/x86/Makefile -@@ -74,9 +74,9 @@ export BITS +@@ -74,7 +74,7 @@ export BITS # # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383 # -KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -+KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -mno-avx512f ++KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json --KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 -+KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-avx512f + KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 - # - # CFLAGS for compiling floating point code inside the kernel. diff --git a/debian/patches/mixed-arch/0005-krd-adjust-KBUILD_CFLAGS-fno-tree-vectorize.patch b/debian/patches/mixed-arch/0005-krd-adjust-KBUILD_CFLAGS-fno-tree-vectorize.patch index 1ea5c2a..fff3a56 100644 --- a/debian/patches/mixed-arch/0005-krd-adjust-KBUILD_CFLAGS-fno-tree-vectorize.patch +++ b/debian/patches/mixed-arch/0005-krd-adjust-KBUILD_CFLAGS-fno-tree-vectorize.patch @@ -4,8 +4,8 @@ # # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383 # --KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -mno-avx512f -+KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -mno-avx512f -fno-tree-vectorize +-KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 ++KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -fno-tree-vectorize KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json - KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-avx512f + KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 diff --git a/debian/patches/mixed-arch/0006-XANMOD-kbuild-Add-GCC-SMS-based-modulo-scheduling-fl.patch b/debian/patches/mixed-arch/0006-XANMOD-kbuild-Add-GCC-SMS-based-modulo-scheduling-fl.patch index 0e324a0..dd5edd6 100644 --- a/debian/patches/mixed-arch/0006-XANMOD-kbuild-Add-GCC-SMS-based-modulo-scheduling-fl.patch +++ b/debian/patches/mixed-arch/0006-XANMOD-kbuild-Add-GCC-SMS-based-modulo-scheduling-fl.patch @@ -1,7 +1,8 @@ -From b1a99a2a9675f80b7c04a239a6b047373ccf3a17 Mon Sep 17 00:00:00 2001 +From 7e45fca50a3151248266bca7058e1efa9b5233ca Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Mon, 16 Sep 2024 00:55:35 +0000 -Subject: XANMOD: kbuild: Add GCC SMS-based modulo scheduling flags +Subject: [PATCH 02/19] XANMOD: kbuild: Add GCC SMS-based modulo scheduling + flags Signed-off-by: Alexandre Frade --- @@ -10,7 +11,7 @@ Signed-off-by: Alexandre Frade --- a/Makefile +++ b/Makefile -@@ -883,6 +883,13 @@ ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE +@@ -880,6 +880,13 @@ ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE KBUILD_CFLAGS += $(call cc-option,-fivopts) endif diff --git a/debian/patches/patchset-pf/amd-pstate/0001-cpufreq-amd-pstate-Remove-the-redundant-des_perf-cla.patch b/debian/patches/patchset-pf/amd-pstate/0001-cpufreq-amd-pstate-Remove-the-redundant-des_perf-cla.patch deleted file mode 100644 index a4776ff..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0001-cpufreq-amd-pstate-Remove-the-redundant-des_perf-cla.patch +++ /dev/null @@ -1,27 +0,0 @@ -From cb40e98d75a75567cbd10f9fc69c2ec12c87a445 Mon Sep 17 00:00:00 2001 -From: Dhananjay Ugwekar -Date: Wed, 5 Feb 2025 11:25:15 +0000 -Subject: cpufreq/amd-pstate: Remove the redundant des_perf clamping in - adjust_perf - -des_perf is later on clamped between min_perf and max_perf in -amd_pstate_update. So, remove the redundant clamping from -amd_pstate_adjust_perf. - -Signed-off-by: Dhananjay Ugwekar -Reviewed-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate.c | 2 -- - 1 file changed, 2 deletions(-) - ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -705,8 +705,6 @@ static void amd_pstate_adjust_perf(unsig - if (max_perf < min_perf) - max_perf = min_perf; - -- des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); -- - amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true, - policy->governor->flags); - cpufreq_cpu_put(policy); diff --git a/debian/patches/patchset-pf/amd-pstate/0002-cpufreq-amd-pstate-Modularize-perf-freq-conversion.patch b/debian/patches/patchset-pf/amd-pstate/0002-cpufreq-amd-pstate-Modularize-perf-freq-conversion.patch deleted file mode 100644 index acc95f1..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0002-cpufreq-amd-pstate-Modularize-perf-freq-conversion.patch +++ /dev/null @@ -1,133 +0,0 @@ -From f58e440e56a6c8a2c04894e5d169d1a98a8ce74f Mon Sep 17 00:00:00 2001 -From: Dhananjay Ugwekar -Date: Wed, 5 Feb 2025 11:25:18 +0000 -Subject: cpufreq/amd-pstate: Modularize perf<->freq conversion - -Delegate the perf<->frequency conversion to helper functions to reduce -code duplication, and improve readability. - -Signed-off-by: Dhananjay Ugwekar -Reviewed-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate.c | 57 +++++++++++++++++++----------------- - 1 file changed, 30 insertions(+), 27 deletions(-) - ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -142,6 +142,20 @@ static struct quirk_entry quirk_amd_7k62 - .lowest_freq = 550, - }; - -+static inline u8 freq_to_perf(struct amd_cpudata *cpudata, unsigned int freq_val) -+{ -+ u8 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf, -+ cpudata->nominal_freq); -+ -+ return clamp_t(u8, perf_val, cpudata->lowest_perf, cpudata->highest_perf); -+} -+ -+static inline u32 perf_to_freq(struct amd_cpudata *cpudata, u8 perf_val) -+{ -+ return DIV_ROUND_UP_ULL((u64)cpudata->nominal_freq * perf_val, -+ cpudata->nominal_perf); -+} -+ - static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi) - { - /** -@@ -534,7 +548,6 @@ static inline bool amd_pstate_sample(str - static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf, - u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags) - { -- unsigned long max_freq; - struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu); - u8 nominal_perf = READ_ONCE(cpudata->nominal_perf); - -@@ -543,8 +556,7 @@ static void amd_pstate_update(struct amd - - des_perf = clamp_t(u8, des_perf, min_perf, max_perf); - -- max_freq = READ_ONCE(cpudata->max_limit_freq); -- policy->cur = div_u64(des_perf * max_freq, max_perf); -+ policy->cur = perf_to_freq(cpudata, des_perf); - - if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { - min_perf = des_perf; -@@ -594,14 +606,11 @@ static int amd_pstate_verify(struct cpuf - - static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) - { -- u8 max_limit_perf, min_limit_perf, max_perf; -- u32 max_freq; -+ u8 max_limit_perf, min_limit_perf; - struct amd_cpudata *cpudata = policy->driver_data; - -- max_perf = READ_ONCE(cpudata->highest_perf); -- max_freq = READ_ONCE(cpudata->max_freq); -- max_limit_perf = div_u64(policy->max * max_perf, max_freq); -- min_limit_perf = div_u64(policy->min * max_perf, max_freq); -+ max_limit_perf = freq_to_perf(cpudata, policy->max); -+ min_limit_perf = freq_to_perf(cpudata, policy->min); - - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - min_limit_perf = min(cpudata->nominal_perf, max_limit_perf); -@@ -619,21 +628,15 @@ static int amd_pstate_update_freq(struct - { - struct cpufreq_freqs freqs; - struct amd_cpudata *cpudata = policy->driver_data; -- u8 des_perf, cap_perf; -- -- if (!cpudata->max_freq) -- return -ENODEV; -+ u8 des_perf; - - if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) - amd_pstate_update_min_max_limit(policy); - -- cap_perf = READ_ONCE(cpudata->highest_perf); -- - freqs.old = policy->cur; - freqs.new = target_freq; - -- des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf, -- cpudata->max_freq); -+ des_perf = freq_to_perf(cpudata, target_freq); - - WARN_ON(fast_switch && !policy->fast_switch_enabled); - /* -@@ -907,7 +910,6 @@ static int amd_pstate_init_freq(struct a - { - int ret; - u32 min_freq, max_freq; -- u8 highest_perf, nominal_perf, lowest_nonlinear_perf; - u32 nominal_freq, lowest_nonlinear_freq; - struct cppc_perf_caps cppc_perf; - -@@ -925,16 +927,17 @@ static int amd_pstate_init_freq(struct a - else - nominal_freq = cppc_perf.nominal_freq; - -- highest_perf = READ_ONCE(cpudata->highest_perf); -- nominal_perf = READ_ONCE(cpudata->nominal_perf); -- max_freq = div_u64((u64)highest_perf * nominal_freq, nominal_perf); -- -- lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); -- lowest_nonlinear_freq = div_u64((u64)nominal_freq * lowest_nonlinear_perf, nominal_perf); -- WRITE_ONCE(cpudata->min_freq, min_freq * 1000); -- WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq * 1000); -- WRITE_ONCE(cpudata->nominal_freq, nominal_freq * 1000); -- WRITE_ONCE(cpudata->max_freq, max_freq * 1000); -+ min_freq *= 1000; -+ nominal_freq *= 1000; -+ -+ WRITE_ONCE(cpudata->nominal_freq, nominal_freq); -+ WRITE_ONCE(cpudata->min_freq, min_freq); -+ -+ max_freq = perf_to_freq(cpudata, cpudata->highest_perf); -+ lowest_nonlinear_freq = perf_to_freq(cpudata, cpudata->lowest_nonlinear_perf); -+ -+ WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); -+ WRITE_ONCE(cpudata->max_freq, max_freq); - - /** - * Below values need to be initialized correctly, otherwise driver will fail to load diff --git a/debian/patches/patchset-pf/amd-pstate/0003-cpufreq-amd-pstate-Remove-the-unnecessary-cpufreq_up.patch b/debian/patches/patchset-pf/amd-pstate/0003-cpufreq-amd-pstate-Remove-the-unnecessary-cpufreq_up.patch deleted file mode 100644 index 9cfe9bb..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0003-cpufreq-amd-pstate-Remove-the-unnecessary-cpufreq_up.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 0a12d4a3ca1a996c1073d60c6775424972e8b7b9 Mon Sep 17 00:00:00 2001 -From: Dhananjay Ugwekar -Date: Wed, 5 Feb 2025 11:25:19 +0000 -Subject: cpufreq/amd-pstate: Remove the unnecessary cpufreq_update_policy call - -The update_limits callback is only called in two conditions. - -* When the preferred core rankings change. In which case, we just need to -change the prefcore ranking in the cpudata struct. As there are no changes -to any of the perf values, there is no need to call cpufreq_update_policy() - -* When the _PPC ACPI object changes, i.e. the highest allowed Pstate -changes. The _PPC object is only used for a table based cpufreq driver -like acpi-cpufreq, hence is irrelevant for CPPC based amd-pstate. - -Hence, the cpufreq_update_policy() call becomes unnecessary and can be -removed. - -Signed-off-by: Dhananjay Ugwekar -Reviewed-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate.c | 4 ---- - 1 file changed, 4 deletions(-) - ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -855,10 +855,6 @@ static void amd_pstate_update_limits(uns - sched_set_itmt_core_prio((int)cur_high, cpu); - } - cpufreq_cpu_put(policy); -- -- if (!highest_perf_changed) -- cpufreq_update_policy(cpu); -- - } - - /* diff --git a/debian/patches/patchset-pf/amd-pstate/0004-cpufreq-amd-pstate-Use-scope-based-cleanup-for-cpufr.patch b/debian/patches/patchset-pf/amd-pstate/0004-cpufreq-amd-pstate-Use-scope-based-cleanup-for-cpufr.patch deleted file mode 100644 index 1600c44..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0004-cpufreq-amd-pstate-Use-scope-based-cleanup-for-cpufr.patch +++ /dev/null @@ -1,124 +0,0 @@ -From ab0520499c83ff44d468f1b2b604c85e2f78d694 Mon Sep 17 00:00:00 2001 -From: Dhananjay Ugwekar -Date: Wed, 5 Feb 2025 11:25:22 +0000 -Subject: cpufreq/amd-pstate: Use scope based cleanup for cpufreq_policy refs - -There have been instances in past where refcount decrementing is missed -while exiting a function. Use automatic scope based cleanup to avoid -such errors. - -Signed-off-by: Dhananjay Ugwekar -Reviewed-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate.c | 25 ++++++++----------------- - include/linux/cpufreq.h | 3 +++ - 2 files changed, 11 insertions(+), 17 deletions(-) - ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -548,7 +548,7 @@ static inline bool amd_pstate_sample(str - static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf, - u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags) - { -- struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu); -+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu); - u8 nominal_perf = READ_ONCE(cpudata->nominal_perf); - - if (!policy) -@@ -574,8 +574,6 @@ static void amd_pstate_update(struct amd - } - - amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch); -- -- cpufreq_cpu_put(policy); - } - - static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) -@@ -587,7 +585,8 @@ static int amd_pstate_verify(struct cpuf - * amd-pstate qos_requests. - */ - if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) { -- struct cpufreq_policy *policy = cpufreq_cpu_get(policy_data->cpu); -+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = -+ cpufreq_cpu_get(policy_data->cpu); - struct amd_cpudata *cpudata; - - if (!policy) -@@ -595,7 +594,6 @@ static int amd_pstate_verify(struct cpuf - - cpudata = policy->driver_data; - policy_data->min = cpudata->lowest_nonlinear_freq; -- cpufreq_cpu_put(policy); - } - - cpufreq_verify_within_cpu_limits(policy_data); -@@ -678,7 +676,7 @@ static void amd_pstate_adjust_perf(unsig - unsigned long capacity) - { - u8 max_perf, min_perf, des_perf, cap_perf, min_limit_perf; -- struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); -+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); - struct amd_cpudata *cpudata; - - if (!policy) -@@ -710,7 +708,6 @@ static void amd_pstate_adjust_perf(unsig - - amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true, - policy->governor->flags); -- cpufreq_cpu_put(policy); - } - - static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) -@@ -823,28 +820,23 @@ static void amd_pstate_init_prefcore(str - - static void amd_pstate_update_limits(unsigned int cpu) - { -- struct cpufreq_policy *policy = NULL; -+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); - struct amd_cpudata *cpudata; - u32 prev_high = 0, cur_high = 0; -- int ret; - bool highest_perf_changed = false; - - if (!amd_pstate_prefcore) - return; - -- policy = cpufreq_cpu_get(cpu); - if (!policy) - return; - -- cpudata = policy->driver_data; -- - guard(mutex)(&amd_pstate_driver_lock); - -- ret = amd_get_highest_perf(cpu, &cur_high); -- if (ret) { -- cpufreq_cpu_put(policy); -+ if (amd_get_highest_perf(cpu, &cur_high)) - return; -- } -+ -+ cpudata = policy->driver_data; - - prev_high = READ_ONCE(cpudata->prefcore_ranking); - highest_perf_changed = (prev_high != cur_high); -@@ -854,7 +846,6 @@ static void amd_pstate_update_limits(uns - if (cur_high < CPPC_MAX_PERF) - sched_set_itmt_core_prio((int)cur_high, cpu); - } -- cpufreq_cpu_put(policy); - } - - /* ---- a/include/linux/cpufreq.h -+++ b/include/linux/cpufreq.h -@@ -213,6 +213,9 @@ static inline struct cpufreq_policy *cpu - static inline void cpufreq_cpu_put(struct cpufreq_policy *policy) { } - #endif - -+/* Scope based cleanup macro for cpufreq_policy kobject reference counting */ -+DEFINE_FREE(put_cpufreq_policy, struct cpufreq_policy *, if (_T) cpufreq_cpu_put(_T)) -+ - static inline bool policy_is_inactive(struct cpufreq_policy *policy) - { - return cpumask_empty(policy->cpus); diff --git a/debian/patches/patchset-pf/amd-pstate/0005-cpufreq-amd-pstate-Remove-the-unncecessary-driver_lo.patch b/debian/patches/patchset-pf/amd-pstate/0005-cpufreq-amd-pstate-Remove-the-unncecessary-driver_lo.patch deleted file mode 100644 index d3b9a9f..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0005-cpufreq-amd-pstate-Remove-the-unncecessary-driver_lo.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 658a4b7a41583e3b73477c0fbbee07aa6d6f7e0e Mon Sep 17 00:00:00 2001 -From: Dhananjay Ugwekar -Date: Wed, 5 Feb 2025 11:25:23 +0000 -Subject: cpufreq/amd-pstate: Remove the unncecessary driver_lock in - amd_pstate_update_limits - -There is no need to take a driver wide lock while updating the -highest_perf value in the percpu cpudata struct. Hence remove it. - -Signed-off-by: Dhananjay Ugwekar -Reviewed-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate.c | 2 -- - 1 file changed, 2 deletions(-) - ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -831,8 +831,6 @@ static void amd_pstate_update_limits(uns - if (!policy) - return; - -- guard(mutex)(&amd_pstate_driver_lock); -- - if (amd_get_highest_perf(cpu, &cur_high)) - return; - diff --git a/debian/patches/patchset-pf/amd-pstate/0006-cpufreq-amd-pstate-Fix-the-clamping-of-perf-values.patch b/debian/patches/patchset-pf/amd-pstate/0006-cpufreq-amd-pstate-Fix-the-clamping-of-perf-values.patch deleted file mode 100644 index 7bbe65f..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0006-cpufreq-amd-pstate-Fix-the-clamping-of-perf-values.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 20f8507de83bc844c6ff2329e61ffc37734364e9 Mon Sep 17 00:00:00 2001 -From: Dhananjay Ugwekar -Date: Sat, 22 Feb 2025 03:32:22 +0000 -Subject: cpufreq/amd-pstate: Fix the clamping of perf values - -The clamping in freq_to_perf() is broken right now, as we first typecast -(read wraparound) the overflowing value into a u8 and then clamp it down. -So, use a u32 to store the >255 value in certain edge cases and then clamp -it down into a u8. - -Also, use a "explicit typecast + clamp" instead of just a "clamp_t" as the -latter typecasts first and then clamps between the limits, which defeats -our purpose. - -Fixes: 305621eb6a8b ("cpufreq/amd-pstate: Modularize perf<->freq conversion") -Signed-off-by: Dhananjay Ugwekar ---- - drivers/cpufreq/amd-pstate.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -144,10 +144,10 @@ static struct quirk_entry quirk_amd_7k62 - - static inline u8 freq_to_perf(struct amd_cpudata *cpudata, unsigned int freq_val) - { -- u8 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf, -+ u32 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf, - cpudata->nominal_freq); - -- return clamp_t(u8, perf_val, cpudata->lowest_perf, cpudata->highest_perf); -+ return (u8)clamp(perf_val, cpudata->lowest_perf, cpudata->highest_perf); - } - - static inline u32 perf_to_freq(struct amd_cpudata *cpudata, u8 perf_val) diff --git a/debian/patches/patchset-pf/amd-pstate/0007-cpufreq-amd-pstate-Show-a-warning-when-a-CPU-fails-t.patch b/debian/patches/patchset-pf/amd-pstate/0007-cpufreq-amd-pstate-Show-a-warning-when-a-CPU-fails-t.patch deleted file mode 100644 index 0b7b0ec..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0007-cpufreq-amd-pstate-Show-a-warning-when-a-CPU-fails-t.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 240a074b7f92278755df715be1ea5ea5d3d2f5ac Mon Sep 17 00:00:00 2001 -From: Mario Limonciello -Date: Wed, 26 Feb 2025 01:49:17 -0600 -Subject: cpufreq/amd-pstate: Show a warning when a CPU fails to setup - -I came across a system that MSR_AMD_CPPC_CAP1 for some CPUs isn't -populated. This is an unexpected behavior that is most likely a -BIOS bug. In the event it happens I'd like users to report bugs -to properly root cause and get this fixed. - -Reviewed-by: Gautham R. Shenoy -Reviewed-by: Dhananjay Ugwekar -Signed-off-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate.c | 2 ++ - 1 file changed, 2 insertions(+) - ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -1027,6 +1027,7 @@ static int amd_pstate_cpu_init(struct cp - free_cpudata2: - freq_qos_remove_request(&cpudata->req[0]); - free_cpudata1: -+ pr_warn("Failed to initialize CPU %d: %d\n", policy->cpu, ret); - kfree(cpudata); - return ret; - } -@@ -1520,6 +1521,7 @@ static int amd_pstate_epp_cpu_init(struc - return 0; - - free_cpudata1: -+ pr_warn("Failed to initialize CPU %d: %d\n", policy->cpu, ret); - kfree(cpudata); - return ret; - } diff --git a/debian/patches/patchset-pf/amd-pstate/0008-cpufreq-amd-pstate-Drop-min-and-max-cached-frequenci.patch b/debian/patches/patchset-pf/amd-pstate/0008-cpufreq-amd-pstate-Drop-min-and-max-cached-frequenci.patch deleted file mode 100644 index bb36fe7..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0008-cpufreq-amd-pstate-Drop-min-and-max-cached-frequenci.patch +++ /dev/null @@ -1,209 +0,0 @@ -From 82520910e91d62f19c944ff17ba8f966553e79d6 Mon Sep 17 00:00:00 2001 -From: Mario Limonciello -Date: Wed, 26 Feb 2025 01:49:18 -0600 -Subject: cpufreq/amd-pstate: Drop min and max cached frequencies - -Use the perf_to_freq helpers to calculate this on the fly. -As the members are no longer cached add an extra check into -amd_pstate_epp_update_limit() to avoid unnecessary calls in -amd_pstate_update_min_max_limit(). - -Reviewed-by: Gautham R. Shenoy -Reviewed-by: Dhananjay Ugwekar -Signed-off-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate-ut.c | 14 +++++------ - drivers/cpufreq/amd-pstate.c | 43 +++++++++------------------------ - drivers/cpufreq/amd-pstate.h | 9 ++----- - 3 files changed, 20 insertions(+), 46 deletions(-) - ---- a/drivers/cpufreq/amd-pstate-ut.c -+++ b/drivers/cpufreq/amd-pstate-ut.c -@@ -214,14 +214,14 @@ static void amd_pstate_ut_check_freq(u32 - break; - cpudata = policy->driver_data; - -- if (!((cpudata->max_freq >= cpudata->nominal_freq) && -+ if (!((policy->cpuinfo.max_freq >= cpudata->nominal_freq) && - (cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) && -- (cpudata->lowest_nonlinear_freq > cpudata->min_freq) && -- (cpudata->min_freq > 0))) { -+ (cpudata->lowest_nonlinear_freq > policy->cpuinfo.min_freq) && -+ (policy->cpuinfo.min_freq > 0))) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n", -- __func__, cpu, cpudata->max_freq, cpudata->nominal_freq, -- cpudata->lowest_nonlinear_freq, cpudata->min_freq); -+ __func__, cpu, policy->cpuinfo.max_freq, cpudata->nominal_freq, -+ cpudata->lowest_nonlinear_freq, policy->cpuinfo.min_freq); - goto skip_test; - } - -@@ -233,13 +233,13 @@ static void amd_pstate_ut_check_freq(u32 - } - - if (cpudata->boost_supported) { -- if ((policy->max == cpudata->max_freq) || -+ if ((policy->max == policy->cpuinfo.max_freq) || - (policy->max == cpudata->nominal_freq)) - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; - else { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n", -- __func__, cpu, policy->max, cpudata->max_freq, -+ __func__, cpu, policy->max, policy->cpuinfo.max_freq, - cpudata->nominal_freq); - goto skip_test; - } ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -717,7 +717,7 @@ static int amd_pstate_cpu_boost_update(s - int ret = 0; - - nominal_freq = READ_ONCE(cpudata->nominal_freq); -- max_freq = READ_ONCE(cpudata->max_freq); -+ max_freq = perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf)); - - if (on) - policy->cpuinfo.max_freq = max_freq; -@@ -916,13 +916,10 @@ static int amd_pstate_init_freq(struct a - nominal_freq *= 1000; - - WRITE_ONCE(cpudata->nominal_freq, nominal_freq); -- WRITE_ONCE(cpudata->min_freq, min_freq); - - max_freq = perf_to_freq(cpudata, cpudata->highest_perf); - lowest_nonlinear_freq = perf_to_freq(cpudata, cpudata->lowest_nonlinear_perf); -- - WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); -- WRITE_ONCE(cpudata->max_freq, max_freq); - - /** - * Below values need to be initialized correctly, otherwise driver will fail to load -@@ -947,9 +944,9 @@ static int amd_pstate_init_freq(struct a - - static int amd_pstate_cpu_init(struct cpufreq_policy *policy) - { -- int min_freq, max_freq, ret; -- struct device *dev; - struct amd_cpudata *cpudata; -+ struct device *dev; -+ int ret; - - /* - * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, -@@ -980,17 +977,11 @@ static int amd_pstate_cpu_init(struct cp - if (ret) - goto free_cpudata1; - -- min_freq = READ_ONCE(cpudata->min_freq); -- max_freq = READ_ONCE(cpudata->max_freq); -- - policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu); - policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu); - -- policy->min = min_freq; -- policy->max = max_freq; -- -- policy->cpuinfo.min_freq = min_freq; -- policy->cpuinfo.max_freq = max_freq; -+ policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf); -+ policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf); - - policy->boost_enabled = READ_ONCE(cpudata->boost_supported); - -@@ -1014,9 +1005,6 @@ static int amd_pstate_cpu_init(struct cp - goto free_cpudata2; - } - -- cpudata->max_limit_freq = max_freq; -- cpudata->min_limit_freq = min_freq; -- - policy->driver_data = cpudata; - - if (!current_pstate_driver->adjust_perf) -@@ -1074,14 +1062,10 @@ static int amd_pstate_cpu_suspend(struct - static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, - char *buf) - { -- int max_freq; - struct amd_cpudata *cpudata = policy->driver_data; - -- max_freq = READ_ONCE(cpudata->max_freq); -- if (max_freq < 0) -- return max_freq; - -- return sysfs_emit(buf, "%u\n", max_freq); -+ return sysfs_emit(buf, "%u\n", perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf))); - } - - static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, -@@ -1439,10 +1423,10 @@ static bool amd_pstate_acpi_pm_profile_u - - static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) - { -- int min_freq, max_freq, ret; - struct amd_cpudata *cpudata; - struct device *dev; - u64 value; -+ int ret; - - /* - * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, -@@ -1473,19 +1457,13 @@ static int amd_pstate_epp_cpu_init(struc - if (ret) - goto free_cpudata1; - -- min_freq = READ_ONCE(cpudata->min_freq); -- max_freq = READ_ONCE(cpudata->max_freq); -- -- policy->cpuinfo.min_freq = min_freq; -- policy->cpuinfo.max_freq = max_freq; -+ policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf); -+ policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf); - /* It will be updated by governor */ - policy->cur = policy->cpuinfo.min_freq; - - policy->driver_data = cpudata; - -- policy->min = policy->cpuinfo.min_freq; -- policy->max = policy->cpuinfo.max_freq; -- - policy->boost_enabled = READ_ONCE(cpudata->boost_supported); - - /* -@@ -1543,7 +1521,8 @@ static int amd_pstate_epp_update_limit(s - struct amd_cpudata *cpudata = policy->driver_data; - u8 epp; - -- amd_pstate_update_min_max_limit(policy); -+ if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) -+ amd_pstate_update_min_max_limit(policy); - - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - epp = 0; ---- a/drivers/cpufreq/amd-pstate.h -+++ b/drivers/cpufreq/amd-pstate.h -@@ -46,8 +46,6 @@ struct amd_aperf_mperf { - * @max_limit_perf: Cached value of the performance corresponding to policy->max - * @min_limit_freq: Cached value of policy->min (in khz) - * @max_limit_freq: Cached value of policy->max (in khz) -- * @max_freq: the frequency (in khz) that mapped to highest_perf -- * @min_freq: the frequency (in khz) that mapped to lowest_perf - * @nominal_freq: the frequency (in khz) that mapped to nominal_perf - * @lowest_nonlinear_freq: the frequency (in khz) that mapped to lowest_nonlinear_perf - * @cur: Difference of Aperf/Mperf/tsc count between last and current sample -@@ -77,11 +75,8 @@ struct amd_cpudata { - u8 prefcore_ranking; - u8 min_limit_perf; - u8 max_limit_perf; -- u32 min_limit_freq; -- u32 max_limit_freq; -- -- u32 max_freq; -- u32 min_freq; -+ u32 min_limit_freq; -+ u32 max_limit_freq; - u32 nominal_freq; - u32 lowest_nonlinear_freq; - diff --git a/debian/patches/patchset-pf/amd-pstate/0009-cpufreq-amd-pstate-Move-perf-values-into-a-union.patch b/debian/patches/patchset-pf/amd-pstate/0009-cpufreq-amd-pstate-Move-perf-values-into-a-union.patch deleted file mode 100644 index 386e106..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0009-cpufreq-amd-pstate-Move-perf-values-into-a-union.patch +++ /dev/null @@ -1,611 +0,0 @@ -From 21109b42429e0d9f0ee1bfadddae38fb5b0b23c3 Mon Sep 17 00:00:00 2001 -From: Mario Limonciello -Date: Wed, 26 Feb 2025 01:49:19 -0600 -Subject: cpufreq/amd-pstate: Move perf values into a union - -By storing perf values in a union all the writes and reads can -be done atomically, removing the need for some concurrency protections. - -While making this change, also drop the cached frequency values, -using inline helpers to calculate them on demand from perf value. - -Reviewed-by: Gautham R. Shenoy -Signed-off-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate-ut.c | 18 +-- - drivers/cpufreq/amd-pstate.c | 205 ++++++++++++++++++-------------- - drivers/cpufreq/amd-pstate.h | 51 +++++--- - 3 files changed, 158 insertions(+), 116 deletions(-) - ---- a/drivers/cpufreq/amd-pstate-ut.c -+++ b/drivers/cpufreq/amd-pstate-ut.c -@@ -129,6 +129,7 @@ static void amd_pstate_ut_check_perf(u32 - struct cppc_perf_caps cppc_perf; - struct cpufreq_policy *policy = NULL; - struct amd_cpudata *cpudata = NULL; -+ union perf_cached cur_perf; - - for_each_possible_cpu(cpu) { - policy = cpufreq_cpu_get(cpu); -@@ -162,19 +163,20 @@ static void amd_pstate_ut_check_perf(u32 - lowest_perf = AMD_CPPC_LOWEST_PERF(cap1); - } - -- if (highest_perf != READ_ONCE(cpudata->highest_perf) && !cpudata->hw_prefcore) { -+ cur_perf = READ_ONCE(cpudata->perf); -+ if (highest_perf != cur_perf.highest_perf && !cpudata->hw_prefcore) { - pr_err("%s cpu%d highest=%d %d highest perf doesn't match\n", -- __func__, cpu, highest_perf, cpudata->highest_perf); -+ __func__, cpu, highest_perf, cur_perf.highest_perf); - goto skip_test; - } -- if ((nominal_perf != READ_ONCE(cpudata->nominal_perf)) || -- (lowest_nonlinear_perf != READ_ONCE(cpudata->lowest_nonlinear_perf)) || -- (lowest_perf != READ_ONCE(cpudata->lowest_perf))) { -+ if (nominal_perf != cur_perf.nominal_perf || -+ (lowest_nonlinear_perf != cur_perf.lowest_nonlinear_perf) || -+ (lowest_perf != cur_perf.lowest_perf)) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s cpu%d nominal=%d %d lowest_nonlinear=%d %d lowest=%d %d, they should be equal!\n", -- __func__, cpu, nominal_perf, cpudata->nominal_perf, -- lowest_nonlinear_perf, cpudata->lowest_nonlinear_perf, -- lowest_perf, cpudata->lowest_perf); -+ __func__, cpu, nominal_perf, cur_perf.nominal_perf, -+ lowest_nonlinear_perf, cur_perf.lowest_nonlinear_perf, -+ lowest_perf, cur_perf.lowest_perf); - goto skip_test; - } - ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -142,18 +142,17 @@ static struct quirk_entry quirk_amd_7k62 - .lowest_freq = 550, - }; - --static inline u8 freq_to_perf(struct amd_cpudata *cpudata, unsigned int freq_val) -+static inline u8 freq_to_perf(union perf_cached perf, u32 nominal_freq, unsigned int freq_val) - { -- u32 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf, -- cpudata->nominal_freq); -+ u32 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * perf.nominal_perf, nominal_freq); - -- return (u8)clamp(perf_val, cpudata->lowest_perf, cpudata->highest_perf); -+ return (u8)clamp(perf_val, perf.lowest_perf, perf.highest_perf); - } - --static inline u32 perf_to_freq(struct amd_cpudata *cpudata, u8 perf_val) -+static inline u32 perf_to_freq(union perf_cached perf, u32 nominal_freq, u8 perf_val) - { -- return DIV_ROUND_UP_ULL((u64)cpudata->nominal_freq * perf_val, -- cpudata->nominal_perf); -+ return DIV_ROUND_UP_ULL((u64)nominal_freq * perf_val, -+ perf.nominal_perf); - } - - static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi) -@@ -347,7 +346,9 @@ static int amd_pstate_set_energy_pref_in - } - - if (trace_amd_pstate_epp_perf_enabled()) { -- trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, -+ union perf_cached perf = READ_ONCE(cpudata->perf); -+ -+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, - epp, - FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), - FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), -@@ -425,6 +426,7 @@ static inline int amd_pstate_cppc_enable - - static int msr_init_perf(struct amd_cpudata *cpudata) - { -+ union perf_cached perf = READ_ONCE(cpudata->perf); - u64 cap1, numerator; - - int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, -@@ -436,19 +438,21 @@ static int msr_init_perf(struct amd_cpud - if (ret) - return ret; - -- WRITE_ONCE(cpudata->highest_perf, numerator); -- WRITE_ONCE(cpudata->max_limit_perf, numerator); -- WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); -- WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); -- WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); -+ perf.highest_perf = numerator; -+ perf.max_limit_perf = numerator; -+ perf.min_limit_perf = AMD_CPPC_LOWEST_PERF(cap1); -+ perf.nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1); -+ perf.lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1); -+ perf.lowest_perf = AMD_CPPC_LOWEST_PERF(cap1); -+ WRITE_ONCE(cpudata->perf, perf); - WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1)); -- WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1)); - return 0; - } - - static int shmem_init_perf(struct amd_cpudata *cpudata) - { - struct cppc_perf_caps cppc_perf; -+ union perf_cached perf = READ_ONCE(cpudata->perf); - u64 numerator; - - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); -@@ -459,14 +463,14 @@ static int shmem_init_perf(struct amd_cp - if (ret) - return ret; - -- WRITE_ONCE(cpudata->highest_perf, numerator); -- WRITE_ONCE(cpudata->max_limit_perf, numerator); -- WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); -- WRITE_ONCE(cpudata->lowest_nonlinear_perf, -- cppc_perf.lowest_nonlinear_perf); -- WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); -+ perf.highest_perf = numerator; -+ perf.max_limit_perf = numerator; -+ perf.min_limit_perf = cppc_perf.lowest_perf; -+ perf.nominal_perf = cppc_perf.nominal_perf; -+ perf.lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; -+ perf.lowest_perf = cppc_perf.lowest_perf; -+ WRITE_ONCE(cpudata->perf, perf); - WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf); -- WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf); - - if (cppc_state == AMD_PSTATE_ACTIVE) - return 0; -@@ -549,14 +553,14 @@ static void amd_pstate_update(struct amd - u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags) - { - struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu); -- u8 nominal_perf = READ_ONCE(cpudata->nominal_perf); -+ union perf_cached perf = READ_ONCE(cpudata->perf); - - if (!policy) - return; - - des_perf = clamp_t(u8, des_perf, min_perf, max_perf); - -- policy->cur = perf_to_freq(cpudata, des_perf); -+ policy->cur = perf_to_freq(perf, cpudata->nominal_freq, des_perf); - - if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { - min_perf = des_perf; -@@ -565,7 +569,7 @@ static void amd_pstate_update(struct amd - - /* limit the max perf when core performance boost feature is disabled */ - if (!cpudata->boost_supported) -- max_perf = min_t(u8, nominal_perf, max_perf); -+ max_perf = min_t(u8, perf.nominal_perf, max_perf); - - if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { - trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, -@@ -602,39 +606,41 @@ static int amd_pstate_verify(struct cpuf - return 0; - } - --static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) -+static void amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) - { -- u8 max_limit_perf, min_limit_perf; - struct amd_cpudata *cpudata = policy->driver_data; -+ union perf_cached perf = READ_ONCE(cpudata->perf); - -- max_limit_perf = freq_to_perf(cpudata, policy->max); -- min_limit_perf = freq_to_perf(cpudata, policy->min); -+ perf.max_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->max); -+ perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min); - - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) -- min_limit_perf = min(cpudata->nominal_perf, max_limit_perf); -+ perf.min_limit_perf = min(perf.nominal_perf, perf.max_limit_perf); - -- WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); -- WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); - WRITE_ONCE(cpudata->max_limit_freq, policy->max); - WRITE_ONCE(cpudata->min_limit_freq, policy->min); -- -- return 0; -+ WRITE_ONCE(cpudata->perf, perf); - } - - static int amd_pstate_update_freq(struct cpufreq_policy *policy, - unsigned int target_freq, bool fast_switch) - { - struct cpufreq_freqs freqs; -- struct amd_cpudata *cpudata = policy->driver_data; -+ struct amd_cpudata *cpudata; -+ union perf_cached perf; - u8 des_perf; - -+ cpudata = policy->driver_data; -+ - if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) - amd_pstate_update_min_max_limit(policy); - -+ perf = READ_ONCE(cpudata->perf); -+ - freqs.old = policy->cur; - freqs.new = target_freq; - -- des_perf = freq_to_perf(cpudata, target_freq); -+ des_perf = freq_to_perf(perf, cpudata->nominal_freq, target_freq); - - WARN_ON(fast_switch && !policy->fast_switch_enabled); - /* -@@ -645,8 +651,8 @@ static int amd_pstate_update_freq(struct - if (!fast_switch) - cpufreq_freq_transition_begin(policy, &freqs); - -- amd_pstate_update(cpudata, cpudata->min_limit_perf, des_perf, -- cpudata->max_limit_perf, fast_switch, -+ amd_pstate_update(cpudata, perf.min_limit_perf, des_perf, -+ perf.max_limit_perf, fast_switch, - policy->governor->flags); - - if (!fast_switch) -@@ -675,9 +681,10 @@ static void amd_pstate_adjust_perf(unsig - unsigned long target_perf, - unsigned long capacity) - { -- u8 max_perf, min_perf, des_perf, cap_perf, min_limit_perf; -+ u8 max_perf, min_perf, des_perf, cap_perf; - struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); - struct amd_cpudata *cpudata; -+ union perf_cached perf; - - if (!policy) - return; -@@ -687,8 +694,8 @@ static void amd_pstate_adjust_perf(unsig - if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) - amd_pstate_update_min_max_limit(policy); - -- cap_perf = READ_ONCE(cpudata->highest_perf); -- min_limit_perf = READ_ONCE(cpudata->min_limit_perf); -+ perf = READ_ONCE(cpudata->perf); -+ cap_perf = perf.highest_perf; - - des_perf = cap_perf; - if (target_perf < capacity) -@@ -699,10 +706,10 @@ static void amd_pstate_adjust_perf(unsig - else - min_perf = cap_perf; - -- if (min_perf < min_limit_perf) -- min_perf = min_limit_perf; -+ if (min_perf < perf.min_limit_perf) -+ min_perf = perf.min_limit_perf; - -- max_perf = cpudata->max_limit_perf; -+ max_perf = perf.max_limit_perf; - if (max_perf < min_perf) - max_perf = min_perf; - -@@ -713,11 +720,12 @@ static void amd_pstate_adjust_perf(unsig - static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) - { - struct amd_cpudata *cpudata = policy->driver_data; -+ union perf_cached perf = READ_ONCE(cpudata->perf); - u32 nominal_freq, max_freq; - int ret = 0; - - nominal_freq = READ_ONCE(cpudata->nominal_freq); -- max_freq = perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf)); -+ max_freq = perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf); - - if (on) - policy->cpuinfo.max_freq = max_freq; -@@ -881,30 +889,30 @@ static u32 amd_pstate_get_transition_lat - } - - /* -- * amd_pstate_init_freq: Initialize the max_freq, min_freq, -- * nominal_freq and lowest_nonlinear_freq for -- * the @cpudata object. -+ * amd_pstate_init_freq: Initialize the nominal_freq and lowest_nonlinear_freq -+ * for the @cpudata object. - * -- * Requires: highest_perf, lowest_perf, nominal_perf and -- * lowest_nonlinear_perf members of @cpudata to be -- * initialized. -+ * Requires: all perf members of @cpudata to be initialized. - * -- * Returns 0 on success, non-zero value on failure. -+ * Returns 0 on success, non-zero value on failure. - */ - static int amd_pstate_init_freq(struct amd_cpudata *cpudata) - { -- int ret; -- u32 min_freq, max_freq; -- u32 nominal_freq, lowest_nonlinear_freq; -+ u32 min_freq, max_freq, nominal_freq, lowest_nonlinear_freq; - struct cppc_perf_caps cppc_perf; -+ union perf_cached perf; -+ int ret; - - ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); - if (ret) - return ret; -+ perf = READ_ONCE(cpudata->perf); - -- if (quirks && quirks->lowest_freq) -+ if (quirks && quirks->lowest_freq) { - min_freq = quirks->lowest_freq; -- else -+ perf.lowest_perf = freq_to_perf(perf, nominal_freq, min_freq); -+ WRITE_ONCE(cpudata->perf, perf); -+ } else - min_freq = cppc_perf.lowest_freq; - - if (quirks && quirks->nominal_freq) -@@ -917,8 +925,8 @@ static int amd_pstate_init_freq(struct a - - WRITE_ONCE(cpudata->nominal_freq, nominal_freq); - -- max_freq = perf_to_freq(cpudata, cpudata->highest_perf); -- lowest_nonlinear_freq = perf_to_freq(cpudata, cpudata->lowest_nonlinear_perf); -+ max_freq = perf_to_freq(perf, nominal_freq, perf.highest_perf); -+ lowest_nonlinear_freq = perf_to_freq(perf, nominal_freq, perf.lowest_nonlinear_perf); - WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); - - /** -@@ -945,6 +953,7 @@ static int amd_pstate_init_freq(struct a - static int amd_pstate_cpu_init(struct cpufreq_policy *policy) - { - struct amd_cpudata *cpudata; -+ union perf_cached perf; - struct device *dev; - int ret; - -@@ -980,8 +989,14 @@ static int amd_pstate_cpu_init(struct cp - policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu); - policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu); - -- policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf); -- policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf); -+ perf = READ_ONCE(cpudata->perf); -+ -+ policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf, -+ cpudata->nominal_freq, -+ perf.lowest_perf); -+ policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf, -+ cpudata->nominal_freq, -+ perf.highest_perf); - - policy->boost_enabled = READ_ONCE(cpudata->boost_supported); - -@@ -1062,23 +1077,27 @@ static int amd_pstate_cpu_suspend(struct - static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, - char *buf) - { -- struct amd_cpudata *cpudata = policy->driver_data; -+ struct amd_cpudata *cpudata; -+ union perf_cached perf; - -+ cpudata = policy->driver_data; -+ perf = READ_ONCE(cpudata->perf); - -- return sysfs_emit(buf, "%u\n", perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf))); -+ return sysfs_emit(buf, "%u\n", -+ perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf)); - } - - static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, - char *buf) - { -- int freq; -- struct amd_cpudata *cpudata = policy->driver_data; -+ struct amd_cpudata *cpudata; -+ union perf_cached perf; - -- freq = READ_ONCE(cpudata->lowest_nonlinear_freq); -- if (freq < 0) -- return freq; -+ cpudata = policy->driver_data; -+ perf = READ_ONCE(cpudata->perf); - -- return sysfs_emit(buf, "%u\n", freq); -+ return sysfs_emit(buf, "%u\n", -+ perf_to_freq(perf, cpudata->nominal_freq, perf.lowest_nonlinear_perf)); - } - - /* -@@ -1088,12 +1107,11 @@ static ssize_t show_amd_pstate_lowest_no - static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, - char *buf) - { -- u8 perf; -- struct amd_cpudata *cpudata = policy->driver_data; -+ struct amd_cpudata *cpudata; - -- perf = READ_ONCE(cpudata->highest_perf); -+ cpudata = policy->driver_data; - -- return sysfs_emit(buf, "%u\n", perf); -+ return sysfs_emit(buf, "%u\n", cpudata->perf.highest_perf); - } - - static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy, -@@ -1424,6 +1442,7 @@ static bool amd_pstate_acpi_pm_profile_u - static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) - { - struct amd_cpudata *cpudata; -+ union perf_cached perf; - struct device *dev; - u64 value; - int ret; -@@ -1457,8 +1476,15 @@ static int amd_pstate_epp_cpu_init(struc - if (ret) - goto free_cpudata1; - -- policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf); -- policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf); -+ perf = READ_ONCE(cpudata->perf); -+ -+ policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf, -+ cpudata->nominal_freq, -+ perf.lowest_perf); -+ policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf, -+ cpudata->nominal_freq, -+ perf.highest_perf); -+ - /* It will be updated by governor */ - policy->cur = policy->cpuinfo.min_freq; - -@@ -1519,6 +1545,7 @@ static void amd_pstate_epp_cpu_exit(stru - static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) - { - struct amd_cpudata *cpudata = policy->driver_data; -+ union perf_cached perf; - u8 epp; - - if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) -@@ -1529,15 +1556,16 @@ static int amd_pstate_epp_update_limit(s - else - epp = READ_ONCE(cpudata->epp_cached); - -+ perf = READ_ONCE(cpudata->perf); - if (trace_amd_pstate_epp_perf_enabled()) { -- trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp, -- cpudata->min_limit_perf, -- cpudata->max_limit_perf, -+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, epp, -+ perf.min_limit_perf, -+ perf.max_limit_perf, - policy->boost_enabled); - } - -- return amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, -- cpudata->max_limit_perf, epp, false); -+ return amd_pstate_update_perf(cpudata, perf.min_limit_perf, 0U, -+ perf.max_limit_perf, epp, false); - } - - static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) -@@ -1569,20 +1597,18 @@ static int amd_pstate_epp_set_policy(str - static int amd_pstate_epp_reenable(struct cpufreq_policy *policy) - { - struct amd_cpudata *cpudata = policy->driver_data; -- u8 max_perf; -+ union perf_cached perf = READ_ONCE(cpudata->perf); - int ret; - - ret = amd_pstate_cppc_enable(true); - if (ret) - pr_err("failed to enable amd pstate during resume, return %d\n", ret); - -- max_perf = READ_ONCE(cpudata->highest_perf); -- - if (trace_amd_pstate_epp_perf_enabled()) { -- trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, -+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, - cpudata->epp_cached, - FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), -- max_perf, policy->boost_enabled); -+ perf.highest_perf, policy->boost_enabled); - } - - return amd_pstate_epp_update_limit(policy); -@@ -1606,22 +1632,21 @@ static int amd_pstate_epp_cpu_online(str - static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) - { - struct amd_cpudata *cpudata = policy->driver_data; -- u8 min_perf; -+ union perf_cached perf = READ_ONCE(cpudata->perf); - - if (cpudata->suspended) - return 0; - -- min_perf = READ_ONCE(cpudata->lowest_perf); -- - guard(mutex)(&amd_pstate_limits_lock); - - if (trace_amd_pstate_epp_perf_enabled()) { -- trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, -+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, - AMD_CPPC_EPP_BALANCE_POWERSAVE, -- min_perf, min_perf, policy->boost_enabled); -+ perf.lowest_perf, perf.lowest_perf, -+ policy->boost_enabled); - } - -- return amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, -+ return amd_pstate_update_perf(cpudata, perf.lowest_perf, 0, perf.lowest_perf, - AMD_CPPC_EPP_BALANCE_POWERSAVE, false); - } - ---- a/drivers/cpufreq/amd-pstate.h -+++ b/drivers/cpufreq/amd-pstate.h -@@ -13,6 +13,36 @@ - /********************************************************************* - * AMD P-state INTERFACE * - *********************************************************************/ -+ -+/** -+ * union perf_cached - A union to cache performance-related data. -+ * @highest_perf: the maximum performance an individual processor may reach, -+ * assuming ideal conditions -+ * For platforms that support the preferred core feature, the highest_perf value maybe -+ * configured to any value in the range 166-255 by the firmware (because the preferred -+ * core ranking is encoded in the highest_perf value). To maintain consistency across -+ * all platforms, we split the highest_perf and preferred core ranking values into -+ * cpudata->perf.highest_perf and cpudata->prefcore_ranking. -+ * @nominal_perf: the maximum sustained performance level of the processor, -+ * assuming ideal operating conditions -+ * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power -+ * savings are achieved -+ * @lowest_perf: the absolute lowest performance level of the processor -+ * @min_limit_perf: Cached value of the performance corresponding to policy->min -+ * @max_limit_perf: Cached value of the performance corresponding to policy->max -+ */ -+union perf_cached { -+ struct { -+ u8 highest_perf; -+ u8 nominal_perf; -+ u8 lowest_nonlinear_perf; -+ u8 lowest_perf; -+ u8 min_limit_perf; -+ u8 max_limit_perf; -+ }; -+ u64 val; -+}; -+ - /** - * struct amd_aperf_mperf - * @aperf: actual performance frequency clock count -@@ -30,20 +60,9 @@ struct amd_aperf_mperf { - * @cpu: CPU number - * @req: constraint request to apply - * @cppc_req_cached: cached performance request hints -- * @highest_perf: the maximum performance an individual processor may reach, -- * assuming ideal conditions -- * For platforms that do not support the preferred core feature, the -- * highest_pef may be configured with 166 or 255, to avoid max frequency -- * calculated wrongly. we take the fixed value as the highest_perf. -- * @nominal_perf: the maximum sustained performance level of the processor, -- * assuming ideal operating conditions -- * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power -- * savings are achieved -- * @lowest_perf: the absolute lowest performance level of the processor -+ * @perf: cached performance-related data - * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher - * priority. -- * @min_limit_perf: Cached value of the performance corresponding to policy->min -- * @max_limit_perf: Cached value of the performance corresponding to policy->max - * @min_limit_freq: Cached value of policy->min (in khz) - * @max_limit_freq: Cached value of policy->max (in khz) - * @nominal_freq: the frequency (in khz) that mapped to nominal_perf -@@ -68,13 +87,9 @@ struct amd_cpudata { - struct freq_qos_request req[2]; - u64 cppc_req_cached; - -- u8 highest_perf; -- u8 nominal_perf; -- u8 lowest_nonlinear_perf; -- u8 lowest_perf; -+ union perf_cached perf; -+ - u8 prefcore_ranking; -- u8 min_limit_perf; -- u8 max_limit_perf; - u32 min_limit_freq; - u32 max_limit_freq; - u32 nominal_freq; diff --git a/debian/patches/patchset-pf/amd-pstate/0010-cpufreq-amd-pstate-Overhaul-locking.patch b/debian/patches/patchset-pf/amd-pstate/0010-cpufreq-amd-pstate-Overhaul-locking.patch deleted file mode 100644 index 33c5ab3..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0010-cpufreq-amd-pstate-Overhaul-locking.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 0daee82069cfe4a322bed954a4a5f19226e49e95 Mon Sep 17 00:00:00 2001 -From: Mario Limonciello -Date: Wed, 26 Feb 2025 01:49:20 -0600 -Subject: cpufreq/amd-pstate: Overhaul locking - -amd_pstate_cpu_boost_update() and refresh_frequency_limits() both -update the policy state and have nothing to do with the amd-pstate -driver itself. - -A global "limits" lock doesn't make sense because each CPU can have -policies changed independently. Each time a CPU changes values they -will atomically be written to the per-CPU perf member. Drop per CPU -locking cases. - -The remaining "global" driver lock is used to ensure that only one -entity can change driver modes at a given time. - -Reviewed-by: Gautham R. Shenoy -Reviewed-by: Dhananjay Ugwekar -Signed-off-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate.c | 13 +++---------- - 1 file changed, 3 insertions(+), 10 deletions(-) - ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -196,7 +196,6 @@ static inline int get_mode_idx_from_str( - return -EINVAL; - } - --static DEFINE_MUTEX(amd_pstate_limits_lock); - static DEFINE_MUTEX(amd_pstate_driver_lock); - - static u8 msr_get_epp(struct amd_cpudata *cpudata) -@@ -1169,8 +1168,6 @@ static ssize_t store_energy_performance_ - if (ret < 0) - return -EINVAL; - -- guard(mutex)(&amd_pstate_limits_lock); -- - ret = amd_pstate_set_energy_pref_index(policy, ret); - - return ret ? ret : count; -@@ -1343,8 +1340,10 @@ int amd_pstate_update_status(const char - if (mode_idx < 0 || mode_idx >= AMD_PSTATE_MAX) - return -EINVAL; - -- if (mode_state_machine[cppc_state][mode_idx]) -+ if (mode_state_machine[cppc_state][mode_idx]) { -+ guard(mutex)(&amd_pstate_driver_lock); - return mode_state_machine[cppc_state][mode_idx](mode_idx); -+ } - - return 0; - } -@@ -1365,7 +1364,6 @@ static ssize_t status_store(struct devic - char *p = memchr(buf, '\n', count); - int ret; - -- guard(mutex)(&amd_pstate_driver_lock); - ret = amd_pstate_update_status(buf, p ? p - buf : count); - - return ret < 0 ? ret : count; -@@ -1637,8 +1635,6 @@ static int amd_pstate_epp_cpu_offline(st - if (cpudata->suspended) - return 0; - -- guard(mutex)(&amd_pstate_limits_lock); -- - if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, - AMD_CPPC_EPP_BALANCE_POWERSAVE, -@@ -1678,8 +1674,6 @@ static int amd_pstate_epp_resume(struct - struct amd_cpudata *cpudata = policy->driver_data; - - if (cpudata->suspended) { -- guard(mutex)(&amd_pstate_limits_lock); -- - /* enable amd pstate from suspend state*/ - amd_pstate_epp_reenable(policy); - diff --git a/debian/patches/patchset-pf/amd-pstate/0011-cpufreq-amd-pstate-Drop-cppc_cap1_cached.patch b/debian/patches/patchset-pf/amd-pstate/0011-cpufreq-amd-pstate-Drop-cppc_cap1_cached.patch deleted file mode 100644 index 3492d81..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0011-cpufreq-amd-pstate-Drop-cppc_cap1_cached.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 7c820a91ffd02aa7e426e8801893575f218a7a80 Mon Sep 17 00:00:00 2001 -From: Mario Limonciello -Date: Wed, 26 Feb 2025 01:49:21 -0600 -Subject: cpufreq/amd-pstate: Drop `cppc_cap1_cached` - -The `cppc_cap1_cached` variable isn't used at all, there is no -need to read it at initialization for each CPU. - -Reviewed-by: Gautham R. Shenoy -Reviewed-by: Dhananjay Ugwekar -Signed-off-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate.c | 5 ----- - drivers/cpufreq/amd-pstate.h | 2 -- - 2 files changed, 7 deletions(-) - ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -1508,11 +1508,6 @@ static int amd_pstate_epp_cpu_init(struc - if (ret) - return ret; - WRITE_ONCE(cpudata->cppc_req_cached, value); -- -- ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value); -- if (ret) -- return ret; -- WRITE_ONCE(cpudata->cppc_cap1_cached, value); - } - ret = amd_pstate_set_epp(cpudata, cpudata->epp_default); - if (ret) ---- a/drivers/cpufreq/amd-pstate.h -+++ b/drivers/cpufreq/amd-pstate.h -@@ -76,7 +76,6 @@ struct amd_aperf_mperf { - * AMD P-State driver supports preferred core featue. - * @epp_cached: Cached CPPC energy-performance preference value - * @policy: Cpufreq policy value -- * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value - * - * The amd_cpudata is key private data for each CPU thread in AMD P-State, and - * represents all the attributes and goals that AMD P-State requests at runtime. -@@ -105,7 +104,6 @@ struct amd_cpudata { - /* EPP feature related attributes*/ - u8 epp_cached; - u32 policy; -- u64 cppc_cap1_cached; - bool suspended; - u8 epp_default; - }; diff --git a/debian/patches/patchset-pf/amd-pstate/0012-cpufreq-amd-pstate-ut-Use-_free-macro-to-free-put-po.patch b/debian/patches/patchset-pf/amd-pstate/0012-cpufreq-amd-pstate-ut-Use-_free-macro-to-free-put-po.patch deleted file mode 100644 index d002230..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0012-cpufreq-amd-pstate-ut-Use-_free-macro-to-free-put-po.patch +++ /dev/null @@ -1,144 +0,0 @@ -From 5d0c340db98de378a11abfbaf587b6e601e7291c Mon Sep 17 00:00:00 2001 -From: Mario Limonciello -Date: Wed, 26 Feb 2025 01:49:22 -0600 -Subject: cpufreq/amd-pstate-ut: Use _free macro to free put policy - -Using a scoped cleanup macro simplifies cleanup code. - -Reviewed-by: Dhananjay Ugwekar -Reviewed-by: Gautham R. Shenoy -Signed-off-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate-ut.c | 33 ++++++++++++++------------------- - 1 file changed, 14 insertions(+), 19 deletions(-) - ---- a/drivers/cpufreq/amd-pstate-ut.c -+++ b/drivers/cpufreq/amd-pstate-ut.c -@@ -26,6 +26,7 @@ - #include - #include - #include -+#include - - #include - -@@ -127,11 +128,12 @@ static void amd_pstate_ut_check_perf(u32 - u32 highest_perf = 0, nominal_perf = 0, lowest_nonlinear_perf = 0, lowest_perf = 0; - u64 cap1 = 0; - struct cppc_perf_caps cppc_perf; -- struct cpufreq_policy *policy = NULL; - struct amd_cpudata *cpudata = NULL; - union perf_cached cur_perf; - - for_each_possible_cpu(cpu) { -+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL; -+ - policy = cpufreq_cpu_get(cpu); - if (!policy) - break; -@@ -142,7 +144,7 @@ static void amd_pstate_ut_check_perf(u32 - if (ret) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s cppc_get_perf_caps ret=%d error!\n", __func__, ret); -- goto skip_test; -+ return; - } - - highest_perf = cppc_perf.highest_perf; -@@ -154,7 +156,7 @@ static void amd_pstate_ut_check_perf(u32 - if (ret) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret); -- goto skip_test; -+ return; - } - - highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); -@@ -167,7 +169,7 @@ static void amd_pstate_ut_check_perf(u32 - if (highest_perf != cur_perf.highest_perf && !cpudata->hw_prefcore) { - pr_err("%s cpu%d highest=%d %d highest perf doesn't match\n", - __func__, cpu, highest_perf, cur_perf.highest_perf); -- goto skip_test; -+ return; - } - if (nominal_perf != cur_perf.nominal_perf || - (lowest_nonlinear_perf != cur_perf.lowest_nonlinear_perf) || -@@ -177,7 +179,7 @@ static void amd_pstate_ut_check_perf(u32 - __func__, cpu, nominal_perf, cur_perf.nominal_perf, - lowest_nonlinear_perf, cur_perf.lowest_nonlinear_perf, - lowest_perf, cur_perf.lowest_perf); -- goto skip_test; -+ return; - } - - if (!((highest_perf >= nominal_perf) && -@@ -188,15 +190,11 @@ static void amd_pstate_ut_check_perf(u32 - pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n", - __func__, cpu, highest_perf, nominal_perf, - lowest_nonlinear_perf, lowest_perf); -- goto skip_test; -+ return; - } -- cpufreq_cpu_put(policy); - } - - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; -- return; --skip_test: -- cpufreq_cpu_put(policy); - } - - /* -@@ -207,10 +205,11 @@ skip_test: - static void amd_pstate_ut_check_freq(u32 index) - { - int cpu = 0; -- struct cpufreq_policy *policy = NULL; - struct amd_cpudata *cpudata = NULL; - - for_each_possible_cpu(cpu) { -+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL; -+ - policy = cpufreq_cpu_get(cpu); - if (!policy) - break; -@@ -224,14 +223,14 @@ static void amd_pstate_ut_check_freq(u32 - pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n", - __func__, cpu, policy->cpuinfo.max_freq, cpudata->nominal_freq, - cpudata->lowest_nonlinear_freq, policy->cpuinfo.min_freq); -- goto skip_test; -+ return; - } - - if (cpudata->lowest_nonlinear_freq != policy->min) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s cpu%d cpudata_lowest_nonlinear_freq=%d policy_min=%d, they should be equal!\n", - __func__, cpu, cpudata->lowest_nonlinear_freq, policy->min); -- goto skip_test; -+ return; - } - - if (cpudata->boost_supported) { -@@ -243,20 +242,16 @@ static void amd_pstate_ut_check_freq(u32 - pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n", - __func__, cpu, policy->max, policy->cpuinfo.max_freq, - cpudata->nominal_freq); -- goto skip_test; -+ return; - } - } else { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s cpu%d must support boost!\n", __func__, cpu); -- goto skip_test; -+ return; - } -- cpufreq_cpu_put(policy); - } - - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; -- return; --skip_test: -- cpufreq_cpu_put(policy); - } - - static int amd_pstate_set_mode(enum amd_pstate_mode mode) diff --git a/debian/patches/patchset-pf/amd-pstate/0013-cpufreq-amd-pstate-ut-Allow-lowest-nonlinear-and-low.patch b/debian/patches/patchset-pf/amd-pstate/0013-cpufreq-amd-pstate-ut-Allow-lowest-nonlinear-and-low.patch deleted file mode 100644 index 023ade7..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0013-cpufreq-amd-pstate-ut-Allow-lowest-nonlinear-and-low.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 8937b7068ca30072c4c4cf4c22000112afbd6839 Mon Sep 17 00:00:00 2001 -From: Mario Limonciello -Date: Wed, 26 Feb 2025 01:49:23 -0600 -Subject: cpufreq/amd-pstate-ut: Allow lowest nonlinear and lowest to be the - same - -Several Ryzen AI processors support the exact same value for lowest -nonlinear perf and lowest perf. Loosen up the unit tests to allow this -scenario. - -Reviewed-by: Gautham R. Shenoy -Reviewed-by: Dhananjay Ugwekar -Signed-off-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate-ut.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/drivers/cpufreq/amd-pstate-ut.c -+++ b/drivers/cpufreq/amd-pstate-ut.c -@@ -184,7 +184,7 @@ static void amd_pstate_ut_check_perf(u32 - - if (!((highest_perf >= nominal_perf) && - (nominal_perf > lowest_nonlinear_perf) && -- (lowest_nonlinear_perf > lowest_perf) && -+ (lowest_nonlinear_perf >= lowest_perf) && - (lowest_perf > 0))) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n", -@@ -217,7 +217,7 @@ static void amd_pstate_ut_check_freq(u32 - - if (!((policy->cpuinfo.max_freq >= cpudata->nominal_freq) && - (cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) && -- (cpudata->lowest_nonlinear_freq > policy->cpuinfo.min_freq) && -+ (cpudata->lowest_nonlinear_freq >= policy->cpuinfo.min_freq) && - (policy->cpuinfo.min_freq > 0))) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n", diff --git a/debian/patches/patchset-pf/amd-pstate/0014-cpufreq-amd-pstate-ut-Drop-SUCCESS-and-FAIL-enums.patch b/debian/patches/patchset-pf/amd-pstate/0014-cpufreq-amd-pstate-ut-Drop-SUCCESS-and-FAIL-enums.patch deleted file mode 100644 index 465ff36..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0014-cpufreq-amd-pstate-ut-Drop-SUCCESS-and-FAIL-enums.patch +++ /dev/null @@ -1,309 +0,0 @@ -From 8cb701e059fa08dcb9ab74e3c84abc224ff72714 Mon Sep 17 00:00:00 2001 -From: Mario Limonciello -Date: Wed, 26 Feb 2025 01:49:24 -0600 -Subject: cpufreq/amd-pstate-ut: Drop SUCCESS and FAIL enums - -Enums are effectively used as a boolean and don't show -the return value of the failing call. - -Instead of using enums switch to returning the actual return -code from the unit test. - -Reviewed-by: Gautham R. Shenoy -Reviewed-by: Dhananjay Ugwekar -Signed-off-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate-ut.c | 143 ++++++++++++-------------------- - 1 file changed, 55 insertions(+), 88 deletions(-) - ---- a/drivers/cpufreq/amd-pstate-ut.c -+++ b/drivers/cpufreq/amd-pstate-ut.c -@@ -32,30 +32,20 @@ - - #include "amd-pstate.h" - --/* -- * Abbreviations: -- * amd_pstate_ut: used as a shortform for AMD P-State unit test. -- * It helps to keep variable names smaller, simpler -- */ --enum amd_pstate_ut_result { -- AMD_PSTATE_UT_RESULT_PASS, -- AMD_PSTATE_UT_RESULT_FAIL, --}; - - struct amd_pstate_ut_struct { - const char *name; -- void (*func)(u32 index); -- enum amd_pstate_ut_result result; -+ int (*func)(u32 index); - }; - - /* - * Kernel module for testing the AMD P-State unit test - */ --static void amd_pstate_ut_acpi_cpc_valid(u32 index); --static void amd_pstate_ut_check_enabled(u32 index); --static void amd_pstate_ut_check_perf(u32 index); --static void amd_pstate_ut_check_freq(u32 index); --static void amd_pstate_ut_check_driver(u32 index); -+static int amd_pstate_ut_acpi_cpc_valid(u32 index); -+static int amd_pstate_ut_check_enabled(u32 index); -+static int amd_pstate_ut_check_perf(u32 index); -+static int amd_pstate_ut_check_freq(u32 index); -+static int amd_pstate_ut_check_driver(u32 index); - - static struct amd_pstate_ut_struct amd_pstate_ut_cases[] = { - {"amd_pstate_ut_acpi_cpc_valid", amd_pstate_ut_acpi_cpc_valid }, -@@ -78,51 +68,46 @@ static bool get_shared_mem(void) - /* - * check the _CPC object is present in SBIOS. - */ --static void amd_pstate_ut_acpi_cpc_valid(u32 index) -+static int amd_pstate_ut_acpi_cpc_valid(u32 index) - { -- if (acpi_cpc_valid()) -- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; -- else { -- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; -+ if (!acpi_cpc_valid()) { - pr_err("%s the _CPC object is not present in SBIOS!\n", __func__); -+ return -EINVAL; - } -+ -+ return 0; - } - --static void amd_pstate_ut_pstate_enable(u32 index) -+/* -+ * check if amd pstate is enabled -+ */ -+static int amd_pstate_ut_check_enabled(u32 index) - { -- int ret = 0; - u64 cppc_enable = 0; -+ int ret; -+ -+ if (get_shared_mem()) -+ return 0; - - ret = rdmsrl_safe(MSR_AMD_CPPC_ENABLE, &cppc_enable); - if (ret) { -- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s rdmsrl_safe MSR_AMD_CPPC_ENABLE ret=%d error!\n", __func__, ret); -- return; -+ return ret; - } -- if (cppc_enable) -- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; -- else { -- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; -+ -+ if (!cppc_enable) { - pr_err("%s amd pstate must be enabled!\n", __func__); -+ return -EINVAL; - } --} - --/* -- * check if amd pstate is enabled -- */ --static void amd_pstate_ut_check_enabled(u32 index) --{ -- if (get_shared_mem()) -- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; -- else -- amd_pstate_ut_pstate_enable(index); -+ return 0; - } - - /* - * check if performance values are reasonable. - * highest_perf >= nominal_perf > lowest_nonlinear_perf > lowest_perf > 0 - */ --static void amd_pstate_ut_check_perf(u32 index) -+static int amd_pstate_ut_check_perf(u32 index) - { - int cpu = 0, ret = 0; - u32 highest_perf = 0, nominal_perf = 0, lowest_nonlinear_perf = 0, lowest_perf = 0; -@@ -142,9 +127,8 @@ static void amd_pstate_ut_check_perf(u32 - if (get_shared_mem()) { - ret = cppc_get_perf_caps(cpu, &cppc_perf); - if (ret) { -- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s cppc_get_perf_caps ret=%d error!\n", __func__, ret); -- return; -+ return ret; - } - - highest_perf = cppc_perf.highest_perf; -@@ -154,9 +138,8 @@ static void amd_pstate_ut_check_perf(u32 - } else { - ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1); - if (ret) { -- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret); -- return; -+ return ret; - } - - highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); -@@ -169,32 +152,30 @@ static void amd_pstate_ut_check_perf(u32 - if (highest_perf != cur_perf.highest_perf && !cpudata->hw_prefcore) { - pr_err("%s cpu%d highest=%d %d highest perf doesn't match\n", - __func__, cpu, highest_perf, cur_perf.highest_perf); -- return; -+ return -EINVAL; - } - if (nominal_perf != cur_perf.nominal_perf || - (lowest_nonlinear_perf != cur_perf.lowest_nonlinear_perf) || - (lowest_perf != cur_perf.lowest_perf)) { -- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s cpu%d nominal=%d %d lowest_nonlinear=%d %d lowest=%d %d, they should be equal!\n", - __func__, cpu, nominal_perf, cur_perf.nominal_perf, - lowest_nonlinear_perf, cur_perf.lowest_nonlinear_perf, - lowest_perf, cur_perf.lowest_perf); -- return; -+ return -EINVAL; - } - - if (!((highest_perf >= nominal_perf) && - (nominal_perf > lowest_nonlinear_perf) && - (lowest_nonlinear_perf >= lowest_perf) && - (lowest_perf > 0))) { -- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n", - __func__, cpu, highest_perf, nominal_perf, - lowest_nonlinear_perf, lowest_perf); -- return; -+ return -EINVAL; - } - } - -- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; -+ return 0; - } - - /* -@@ -202,7 +183,7 @@ static void amd_pstate_ut_check_perf(u32 - * max_freq >= nominal_freq > lowest_nonlinear_freq > min_freq > 0 - * check max freq when set support boost mode. - */ --static void amd_pstate_ut_check_freq(u32 index) -+static int amd_pstate_ut_check_freq(u32 index) - { - int cpu = 0; - struct amd_cpudata *cpudata = NULL; -@@ -219,39 +200,33 @@ static void amd_pstate_ut_check_freq(u32 - (cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) && - (cpudata->lowest_nonlinear_freq >= policy->cpuinfo.min_freq) && - (policy->cpuinfo.min_freq > 0))) { -- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n", - __func__, cpu, policy->cpuinfo.max_freq, cpudata->nominal_freq, - cpudata->lowest_nonlinear_freq, policy->cpuinfo.min_freq); -- return; -+ return -EINVAL; - } - - if (cpudata->lowest_nonlinear_freq != policy->min) { -- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s cpu%d cpudata_lowest_nonlinear_freq=%d policy_min=%d, they should be equal!\n", - __func__, cpu, cpudata->lowest_nonlinear_freq, policy->min); -- return; -+ return -EINVAL; - } - - if (cpudata->boost_supported) { -- if ((policy->max == policy->cpuinfo.max_freq) || -- (policy->max == cpudata->nominal_freq)) -- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; -- else { -- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; -+ if ((policy->max != policy->cpuinfo.max_freq) && -+ (policy->max != cpudata->nominal_freq)) { - pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n", - __func__, cpu, policy->max, policy->cpuinfo.max_freq, - cpudata->nominal_freq); -- return; -+ return -EINVAL; - } - } else { -- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s cpu%d must support boost!\n", __func__, cpu); -- return; -+ return -EINVAL; - } - } - -- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; -+ return 0; - } - - static int amd_pstate_set_mode(enum amd_pstate_mode mode) -@@ -263,32 +238,28 @@ static int amd_pstate_set_mode(enum amd_ - return amd_pstate_update_status(mode_str, strlen(mode_str)); - } - --static void amd_pstate_ut_check_driver(u32 index) -+static int amd_pstate_ut_check_driver(u32 index) - { - enum amd_pstate_mode mode1, mode2 = AMD_PSTATE_DISABLE; -- int ret; - - for (mode1 = AMD_PSTATE_DISABLE; mode1 < AMD_PSTATE_MAX; mode1++) { -- ret = amd_pstate_set_mode(mode1); -+ int ret = amd_pstate_set_mode(mode1); - if (ret) -- goto out; -+ return ret; - for (mode2 = AMD_PSTATE_DISABLE; mode2 < AMD_PSTATE_MAX; mode2++) { - if (mode1 == mode2) - continue; - ret = amd_pstate_set_mode(mode2); -- if (ret) -- goto out; -+ if (ret) { -+ pr_err("%s: failed to update status for %s->%s\n", __func__, -+ amd_pstate_get_mode_string(mode1), -+ amd_pstate_get_mode_string(mode2)); -+ return ret; -+ } - } - } --out: -- if (ret) -- pr_warn("%s: failed to update status for %s->%s: %d\n", __func__, -- amd_pstate_get_mode_string(mode1), -- amd_pstate_get_mode_string(mode2), ret); -- -- amd_pstate_ut_cases[index].result = ret ? -- AMD_PSTATE_UT_RESULT_FAIL : -- AMD_PSTATE_UT_RESULT_PASS; -+ -+ return 0; - } - - static int __init amd_pstate_ut_init(void) -@@ -296,16 +267,12 @@ static int __init amd_pstate_ut_init(voi - u32 i = 0, arr_size = ARRAY_SIZE(amd_pstate_ut_cases); - - for (i = 0; i < arr_size; i++) { -- amd_pstate_ut_cases[i].func(i); -- switch (amd_pstate_ut_cases[i].result) { -- case AMD_PSTATE_UT_RESULT_PASS: -+ int ret = amd_pstate_ut_cases[i].func(i); -+ -+ if (ret) -+ pr_err("%-4d %-20s\t fail: %d!\n", i+1, amd_pstate_ut_cases[i].name, ret); -+ else - pr_info("%-4d %-20s\t success!\n", i+1, amd_pstate_ut_cases[i].name); -- break; -- case AMD_PSTATE_UT_RESULT_FAIL: -- default: -- pr_info("%-4d %-20s\t fail!\n", i+1, amd_pstate_ut_cases[i].name); -- break; -- } - } - - return 0; diff --git a/debian/patches/patchset-pf/amd-pstate/0015-cpufreq-amd-pstate-ut-Run-on-all-of-the-correct-CPUs.patch b/debian/patches/patchset-pf/amd-pstate/0015-cpufreq-amd-pstate-ut-Run-on-all-of-the-correct-CPUs.patch deleted file mode 100644 index 39b033f..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0015-cpufreq-amd-pstate-ut-Run-on-all-of-the-correct-CPUs.patch +++ /dev/null @@ -1,50 +0,0 @@ -From c553e0165997349a3f831fa04bdd7f61913a3442 Mon Sep 17 00:00:00 2001 -From: Mario Limonciello -Date: Wed, 26 Feb 2025 01:49:25 -0600 -Subject: cpufreq/amd-pstate-ut: Run on all of the correct CPUs - -If a CPU is missing a policy or one has been offlined then the unit test -is skipped for the rest of the CPUs on the system. - -Instead; iterate online CPUs and skip any missing policies to allow -continuing to test the rest of them. - -Reviewed-by: Gautham R. Shenoy -Reviewed-by: Dhananjay Ugwekar -Signed-off-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate-ut.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - ---- a/drivers/cpufreq/amd-pstate-ut.c -+++ b/drivers/cpufreq/amd-pstate-ut.c -@@ -116,12 +116,12 @@ static int amd_pstate_ut_check_perf(u32 - struct amd_cpudata *cpudata = NULL; - union perf_cached cur_perf; - -- for_each_possible_cpu(cpu) { -+ for_each_online_cpu(cpu) { - struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL; - - policy = cpufreq_cpu_get(cpu); - if (!policy) -- break; -+ continue; - cpudata = policy->driver_data; - - if (get_shared_mem()) { -@@ -188,12 +188,12 @@ static int amd_pstate_ut_check_freq(u32 - int cpu = 0; - struct amd_cpudata *cpudata = NULL; - -- for_each_possible_cpu(cpu) { -+ for_each_online_cpu(cpu) { - struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL; - - policy = cpufreq_cpu_get(cpu); - if (!policy) -- break; -+ continue; - cpudata = policy->driver_data; - - if (!((policy->cpuinfo.max_freq >= cpudata->nominal_freq) && diff --git a/debian/patches/patchset-pf/amd-pstate/0016-cpufreq-amd-pstate-ut-Adjust-variable-scope.patch b/debian/patches/patchset-pf/amd-pstate/0016-cpufreq-amd-pstate-ut-Adjust-variable-scope.patch deleted file mode 100644 index 214c5a1..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0016-cpufreq-amd-pstate-ut-Adjust-variable-scope.patch +++ /dev/null @@ -1,42 +0,0 @@ -From c4197fd693cb98a8a71557187a7cf592d6b68b3c Mon Sep 17 00:00:00 2001 -From: Mario Limonciello -Date: Wed, 26 Feb 2025 01:49:26 -0600 -Subject: cpufreq/amd-pstate-ut: Adjust variable scope - -In amd_pstate_ut_check_freq() and amd_pstate_ut_check_perf() the cpudata -variable is only needed in the scope of the for loop. Move it there. - -Reviewed-by: Gautham R. Shenoy -Reviewed-by: Dhananjay Ugwekar -Signed-off-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate-ut.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/drivers/cpufreq/amd-pstate-ut.c -+++ b/drivers/cpufreq/amd-pstate-ut.c -@@ -113,11 +113,11 @@ static int amd_pstate_ut_check_perf(u32 - u32 highest_perf = 0, nominal_perf = 0, lowest_nonlinear_perf = 0, lowest_perf = 0; - u64 cap1 = 0; - struct cppc_perf_caps cppc_perf; -- struct amd_cpudata *cpudata = NULL; - union perf_cached cur_perf; - - for_each_online_cpu(cpu) { - struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL; -+ struct amd_cpudata *cpudata; - - policy = cpufreq_cpu_get(cpu); - if (!policy) -@@ -186,10 +186,10 @@ static int amd_pstate_ut_check_perf(u32 - static int amd_pstate_ut_check_freq(u32 index) - { - int cpu = 0; -- struct amd_cpudata *cpudata = NULL; - - for_each_online_cpu(cpu) { - struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL; -+ struct amd_cpudata *cpudata; - - policy = cpufreq_cpu_get(cpu); - if (!policy) diff --git a/debian/patches/patchset-pf/amd-pstate/0017-cpufreq-amd-pstate-Replace-all-AMD_CPPC_-macros-with.patch b/debian/patches/patchset-pf/amd-pstate/0017-cpufreq-amd-pstate-Replace-all-AMD_CPPC_-macros-with.patch deleted file mode 100644 index 6bdc28c..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0017-cpufreq-amd-pstate-Replace-all-AMD_CPPC_-macros-with.patch +++ /dev/null @@ -1,123 +0,0 @@ -From 19c375251767f49b62894d3b4782f0b8b01313b8 Mon Sep 17 00:00:00 2001 -From: Mario Limonciello -Date: Wed, 26 Feb 2025 01:49:27 -0600 -Subject: cpufreq/amd-pstate: Replace all AMD_CPPC_* macros with masks - -Bitfield masks are easier to follow and less error prone. - -Reviewed-by: Dhananjay Ugwekar -Reviewed-by: Gautham R. Shenoy -Signed-off-by: Mario Limonciello ---- - arch/x86/include/asm/msr-index.h | 20 +++++++++++--------- - arch/x86/kernel/acpi/cppc.c | 4 +++- - drivers/cpufreq/amd-pstate-ut.c | 9 +++++---- - drivers/cpufreq/amd-pstate.c | 16 ++++++---------- - 4 files changed, 25 insertions(+), 24 deletions(-) - ---- a/arch/x86/include/asm/msr-index.h -+++ b/arch/x86/include/asm/msr-index.h -@@ -709,15 +709,17 @@ - #define MSR_AMD_CPPC_REQ 0xc00102b3 - #define MSR_AMD_CPPC_STATUS 0xc00102b4 - --#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff) --#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff) --#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff) --#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff) -+/* Masks for use with MSR_AMD_CPPC_CAP1 */ -+#define AMD_CPPC_LOWEST_PERF_MASK GENMASK(7, 0) -+#define AMD_CPPC_LOWNONLIN_PERF_MASK GENMASK(15, 8) -+#define AMD_CPPC_NOMINAL_PERF_MASK GENMASK(23, 16) -+#define AMD_CPPC_HIGHEST_PERF_MASK GENMASK(31, 24) - --#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0) --#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8) --#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16) --#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24) -+/* Masks for use with MSR_AMD_CPPC_REQ */ -+#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0) -+#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8) -+#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16) -+#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24) - - /* AMD Performance Counter Global Status and Control MSRs */ - #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 ---- a/arch/x86/kernel/acpi/cppc.c -+++ b/arch/x86/kernel/acpi/cppc.c -@@ -4,6 +4,8 @@ - * Copyright (c) 2016, Intel Corporation. - */ - -+#include -+ - #include - #include - #include -@@ -149,7 +151,7 @@ int amd_get_highest_perf(unsigned int cp - if (ret) - goto out; - -- val = AMD_CPPC_HIGHEST_PERF(val); -+ val = FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, val); - } else { - ret = cppc_get_highest_perf(cpu, &val); - if (ret) ---- a/drivers/cpufreq/amd-pstate-ut.c -+++ b/drivers/cpufreq/amd-pstate-ut.c -@@ -22,6 +22,7 @@ - - #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -+#include - #include - #include - #include -@@ -142,10 +143,10 @@ static int amd_pstate_ut_check_perf(u32 - return ret; - } - -- highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); -- nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1); -- lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1); -- lowest_perf = AMD_CPPC_LOWEST_PERF(cap1); -+ highest_perf = FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, cap1); -+ nominal_perf = FIELD_GET(AMD_CPPC_NOMINAL_PERF_MASK, cap1); -+ lowest_nonlinear_perf = FIELD_GET(AMD_CPPC_LOWNONLIN_PERF_MASK, cap1); -+ lowest_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1); - } - - cur_perf = READ_ONCE(cpudata->perf); ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -89,11 +89,6 @@ static bool cppc_enabled; - static bool amd_pstate_prefcore = true; - static struct quirk_entry *quirks; - --#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0) --#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8) --#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16) --#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24) -- - /* - * AMD Energy Preference Performance (EPP) - * The EPP is used in the CCLK DPM controller to drive -@@ -439,12 +434,13 @@ static int msr_init_perf(struct amd_cpud - - perf.highest_perf = numerator; - perf.max_limit_perf = numerator; -- perf.min_limit_perf = AMD_CPPC_LOWEST_PERF(cap1); -- perf.nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1); -- perf.lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1); -- perf.lowest_perf = AMD_CPPC_LOWEST_PERF(cap1); -+ perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1); -+ perf.nominal_perf = FIELD_GET(AMD_CPPC_NOMINAL_PERF_MASK, cap1); -+ perf.lowest_nonlinear_perf = FIELD_GET(AMD_CPPC_LOWNONLIN_PERF_MASK, cap1); -+ perf.lowest_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1); - WRITE_ONCE(cpudata->perf, perf); -- WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1)); -+ WRITE_ONCE(cpudata->prefcore_ranking, FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, cap1)); -+ - return 0; - } - diff --git a/debian/patches/patchset-pf/amd-pstate/0018-cpufreq-amd-pstate-Cache-CPPC-request-in-shared-mem-.patch b/debian/patches/patchset-pf/amd-pstate/0018-cpufreq-amd-pstate-Cache-CPPC-request-in-shared-mem-.patch deleted file mode 100644 index 51e5c92..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0018-cpufreq-amd-pstate-Cache-CPPC-request-in-shared-mem-.patch +++ /dev/null @@ -1,60 +0,0 @@ -From bb7fadf4a86e19b52cbe850c9274bfa643d3ce52 Mon Sep 17 00:00:00 2001 -From: Mario Limonciello -Date: Wed, 26 Feb 2025 01:49:28 -0600 -Subject: cpufreq/amd-pstate: Cache CPPC request in shared mem case too - -In order to prevent a potential write for shmem_update_perf() -cache the request into the cppc_req_cached variable normally only -used for the MSR case. - -This adds symmetry into the code and potentially avoids extra writes. - -Reviewed-by: Dhananjay Ugwekar -Reviewed-by: Gautham R. Shenoy -Signed-off-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate.c | 22 +++++++++++++++++++++- - 1 file changed, 21 insertions(+), 1 deletion(-) - ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -496,6 +496,8 @@ static int shmem_update_perf(struct amd_ - u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) - { - struct cppc_perf_ctrls perf_ctrls; -+ u64 value, prev; -+ int ret; - - if (cppc_state == AMD_PSTATE_ACTIVE) { - int ret = shmem_set_epp(cpudata, epp); -@@ -504,11 +506,29 @@ static int shmem_update_perf(struct amd_ - return ret; - } - -+ value = prev = READ_ONCE(cpudata->cppc_req_cached); -+ -+ value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | -+ AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); -+ value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); -+ value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); -+ value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); -+ value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); -+ -+ if (value == prev) -+ return 0; -+ - perf_ctrls.max_perf = max_perf; - perf_ctrls.min_perf = min_perf; - perf_ctrls.desired_perf = des_perf; - -- return cppc_set_perf(cpudata->cpu, &perf_ctrls); -+ ret = cppc_set_perf(cpudata->cpu, &perf_ctrls); -+ if (ret) -+ return ret; -+ -+ WRITE_ONCE(cpudata->cppc_req_cached, value); -+ -+ return 0; - } - - static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) diff --git a/debian/patches/patchset-pf/amd-pstate/0019-cpufreq-amd-pstate-Move-all-EPP-tracing-into-_update.patch b/debian/patches/patchset-pf/amd-pstate/0019-cpufreq-amd-pstate-Move-all-EPP-tracing-into-_update.patch deleted file mode 100644 index 044fcea..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0019-cpufreq-amd-pstate-Move-all-EPP-tracing-into-_update.patch +++ /dev/null @@ -1,318 +0,0 @@ -From e02f8a14d44223160d348d5841cc3dd916a14401 Mon Sep 17 00:00:00 2001 -From: Mario Limonciello -Date: Wed, 26 Feb 2025 01:49:29 -0600 -Subject: cpufreq/amd-pstate: Move all EPP tracing into *_update_perf and - *_set_epp functions - -The EPP tracing is done by the caller today, but this precludes the -information about whether the CPPC request has changed. - -Move it into the update_perf and set_epp functions and include information -about whether the request has changed from the last one. -amd_pstate_update_perf() and amd_pstate_set_epp() now require the policy -as an argument instead of the cpudata. - -Reviewed-by: Dhananjay Ugwekar -Reviewed-by: Gautham R. Shenoy -Signed-off-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate-trace.h | 13 +++- - drivers/cpufreq/amd-pstate.c | 118 +++++++++++++++++------------ - 2 files changed, 80 insertions(+), 51 deletions(-) - ---- a/drivers/cpufreq/amd-pstate-trace.h -+++ b/drivers/cpufreq/amd-pstate-trace.h -@@ -90,7 +90,8 @@ TRACE_EVENT(amd_pstate_epp_perf, - u8 epp, - u8 min_perf, - u8 max_perf, -- bool boost -+ bool boost, -+ bool changed - ), - - TP_ARGS(cpu_id, -@@ -98,7 +99,8 @@ TRACE_EVENT(amd_pstate_epp_perf, - epp, - min_perf, - max_perf, -- boost), -+ boost, -+ changed), - - TP_STRUCT__entry( - __field(unsigned int, cpu_id) -@@ -107,6 +109,7 @@ TRACE_EVENT(amd_pstate_epp_perf, - __field(u8, min_perf) - __field(u8, max_perf) - __field(bool, boost) -+ __field(bool, changed) - ), - - TP_fast_assign( -@@ -116,15 +119,17 @@ TRACE_EVENT(amd_pstate_epp_perf, - __entry->min_perf = min_perf; - __entry->max_perf = max_perf; - __entry->boost = boost; -+ __entry->changed = changed; - ), - -- TP_printk("cpu%u: [%hhu<->%hhu]/%hhu, epp=%hhu, boost=%u", -+ TP_printk("cpu%u: [%hhu<->%hhu]/%hhu, epp=%hhu, boost=%u, changed=%u", - (unsigned int)__entry->cpu_id, - (u8)__entry->min_perf, - (u8)__entry->max_perf, - (u8)__entry->highest_perf, - (u8)__entry->epp, -- (bool)__entry->boost -+ (bool)__entry->boost, -+ (bool)__entry->changed - ) - ); - ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -228,9 +228,10 @@ static u8 shmem_get_epp(struct amd_cpuda - return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, epp); - } - --static int msr_update_perf(struct amd_cpudata *cpudata, u8 min_perf, -+static int msr_update_perf(struct cpufreq_policy *policy, u8 min_perf, - u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) - { -+ struct amd_cpudata *cpudata = policy->driver_data; - u64 value, prev; - - value = prev = READ_ONCE(cpudata->cppc_req_cached); -@@ -242,6 +243,18 @@ static int msr_update_perf(struct amd_cp - value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); - value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); - -+ if (trace_amd_pstate_epp_perf_enabled()) { -+ union perf_cached perf = READ_ONCE(cpudata->perf); -+ -+ trace_amd_pstate_epp_perf(cpudata->cpu, -+ perf.highest_perf, -+ epp, -+ min_perf, -+ max_perf, -+ policy->boost_enabled, -+ value != prev); -+ } -+ - if (value == prev) - return 0; - -@@ -256,24 +269,26 @@ static int msr_update_perf(struct amd_cp - } - - WRITE_ONCE(cpudata->cppc_req_cached, value); -- WRITE_ONCE(cpudata->epp_cached, epp); -+ if (epp != cpudata->epp_cached) -+ WRITE_ONCE(cpudata->epp_cached, epp); - - return 0; - } - - DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); - --static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata, -+static inline int amd_pstate_update_perf(struct cpufreq_policy *policy, - u8 min_perf, u8 des_perf, - u8 max_perf, u8 epp, - bool fast_switch) - { -- return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, -+ return static_call(amd_pstate_update_perf)(policy, min_perf, des_perf, - max_perf, epp, fast_switch); - } - --static int msr_set_epp(struct amd_cpudata *cpudata, u8 epp) -+static int msr_set_epp(struct cpufreq_policy *policy, u8 epp) - { -+ struct amd_cpudata *cpudata = policy->driver_data; - u64 value, prev; - int ret; - -@@ -281,6 +296,19 @@ static int msr_set_epp(struct amd_cpudat - value &= ~AMD_CPPC_EPP_PERF_MASK; - value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); - -+ if (trace_amd_pstate_epp_perf_enabled()) { -+ union perf_cached perf = cpudata->perf; -+ -+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, -+ epp, -+ FIELD_GET(AMD_CPPC_MIN_PERF_MASK, -+ cpudata->cppc_req_cached), -+ FIELD_GET(AMD_CPPC_MAX_PERF_MASK, -+ cpudata->cppc_req_cached), -+ policy->boost_enabled, -+ value != prev); -+ } -+ - if (value == prev) - return 0; - -@@ -299,15 +327,29 @@ static int msr_set_epp(struct amd_cpudat - - DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp); - --static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u8 epp) -+static inline int amd_pstate_set_epp(struct cpufreq_policy *policy, u8 epp) - { -- return static_call(amd_pstate_set_epp)(cpudata, epp); -+ return static_call(amd_pstate_set_epp)(policy, epp); - } - --static int shmem_set_epp(struct amd_cpudata *cpudata, u8 epp) -+static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp) - { -- int ret; -+ struct amd_cpudata *cpudata = policy->driver_data; - struct cppc_perf_ctrls perf_ctrls; -+ int ret; -+ -+ if (trace_amd_pstate_epp_perf_enabled()) { -+ union perf_cached perf = cpudata->perf; -+ -+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, -+ epp, -+ FIELD_GET(AMD_CPPC_MIN_PERF_MASK, -+ cpudata->cppc_req_cached), -+ FIELD_GET(AMD_CPPC_MAX_PERF_MASK, -+ cpudata->cppc_req_cached), -+ policy->boost_enabled, -+ epp != cpudata->epp_cached); -+ } - - if (epp == cpudata->epp_cached) - return 0; -@@ -339,17 +381,7 @@ static int amd_pstate_set_energy_pref_in - return -EBUSY; - } - -- if (trace_amd_pstate_epp_perf_enabled()) { -- union perf_cached perf = READ_ONCE(cpudata->perf); -- -- trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, -- epp, -- FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), -- FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), -- policy->boost_enabled); -- } -- -- return amd_pstate_set_epp(cpudata, epp); -+ return amd_pstate_set_epp(policy, epp); - } - - static inline int msr_cppc_enable(bool enable) -@@ -492,15 +524,16 @@ static inline int amd_pstate_init_perf(s - return static_call(amd_pstate_init_perf)(cpudata); - } - --static int shmem_update_perf(struct amd_cpudata *cpudata, u8 min_perf, -+static int shmem_update_perf(struct cpufreq_policy *policy, u8 min_perf, - u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) - { -+ struct amd_cpudata *cpudata = policy->driver_data; - struct cppc_perf_ctrls perf_ctrls; - u64 value, prev; - int ret; - - if (cppc_state == AMD_PSTATE_ACTIVE) { -- int ret = shmem_set_epp(cpudata, epp); -+ int ret = shmem_set_epp(policy, epp); - - if (ret) - return ret; -@@ -515,6 +548,18 @@ static int shmem_update_perf(struct amd_ - value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); - value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); - -+ if (trace_amd_pstate_epp_perf_enabled()) { -+ union perf_cached perf = READ_ONCE(cpudata->perf); -+ -+ trace_amd_pstate_epp_perf(cpudata->cpu, -+ perf.highest_perf, -+ epp, -+ min_perf, -+ max_perf, -+ policy->boost_enabled, -+ value != prev); -+ } -+ - if (value == prev) - return 0; - -@@ -592,7 +637,7 @@ static void amd_pstate_update(struct amd - cpudata->cpu, fast_switch); - } - -- amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch); -+ amd_pstate_update_perf(policy, min_perf, des_perf, max_perf, 0, fast_switch); - } - - static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) -@@ -1525,7 +1570,7 @@ static int amd_pstate_epp_cpu_init(struc - return ret; - WRITE_ONCE(cpudata->cppc_req_cached, value); - } -- ret = amd_pstate_set_epp(cpudata, cpudata->epp_default); -+ ret = amd_pstate_set_epp(policy, cpudata->epp_default); - if (ret) - return ret; - -@@ -1566,14 +1611,8 @@ static int amd_pstate_epp_update_limit(s - epp = READ_ONCE(cpudata->epp_cached); - - perf = READ_ONCE(cpudata->perf); -- if (trace_amd_pstate_epp_perf_enabled()) { -- trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, epp, -- perf.min_limit_perf, -- perf.max_limit_perf, -- policy->boost_enabled); -- } - -- return amd_pstate_update_perf(cpudata, perf.min_limit_perf, 0U, -+ return amd_pstate_update_perf(policy, perf.min_limit_perf, 0U, - perf.max_limit_perf, epp, false); - } - -@@ -1605,20 +1644,12 @@ static int amd_pstate_epp_set_policy(str - - static int amd_pstate_epp_reenable(struct cpufreq_policy *policy) - { -- struct amd_cpudata *cpudata = policy->driver_data; -- union perf_cached perf = READ_ONCE(cpudata->perf); - int ret; - - ret = amd_pstate_cppc_enable(true); - if (ret) - pr_err("failed to enable amd pstate during resume, return %d\n", ret); - -- if (trace_amd_pstate_epp_perf_enabled()) { -- trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, -- cpudata->epp_cached, -- FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), -- perf.highest_perf, policy->boost_enabled); -- } - - return amd_pstate_epp_update_limit(policy); - } -@@ -1646,14 +1677,7 @@ static int amd_pstate_epp_cpu_offline(st - if (cpudata->suspended) - return 0; - -- if (trace_amd_pstate_epp_perf_enabled()) { -- trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, -- AMD_CPPC_EPP_BALANCE_POWERSAVE, -- perf.lowest_perf, perf.lowest_perf, -- policy->boost_enabled); -- } -- -- return amd_pstate_update_perf(cpudata, perf.lowest_perf, 0, perf.lowest_perf, -+ return amd_pstate_update_perf(policy, perf.lowest_perf, 0, perf.lowest_perf, - AMD_CPPC_EPP_BALANCE_POWERSAVE, false); - } - diff --git a/debian/patches/patchset-pf/amd-pstate/0020-cpufreq-amd-pstate-Update-cppc_req_cached-for-shared.patch b/debian/patches/patchset-pf/amd-pstate/0020-cpufreq-amd-pstate-Update-cppc_req_cached-for-shared.patch deleted file mode 100644 index 4b70863..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0020-cpufreq-amd-pstate-Update-cppc_req_cached-for-shared.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 5f0b3bf5497422293576a0783e47d203c52ed863 Mon Sep 17 00:00:00 2001 -From: Mario Limonciello -Date: Wed, 26 Feb 2025 01:49:30 -0600 -Subject: cpufreq/amd-pstate: Update cppc_req_cached for shared mem EPP writes - -On EPP only writes update the cached variable so that the min/max -performance controls don't need to be updated again. - -Reviewed-by: Dhananjay Ugwekar -Reviewed-by: Gautham R. Shenoy -Signed-off-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate.c | 6 ++++++ - 1 file changed, 6 insertions(+) - ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -336,6 +336,7 @@ static int shmem_set_epp(struct cpufreq_ - { - struct amd_cpudata *cpudata = policy->driver_data; - struct cppc_perf_ctrls perf_ctrls; -+ u64 value; - int ret; - - if (trace_amd_pstate_epp_perf_enabled()) { -@@ -362,6 +363,11 @@ static int shmem_set_epp(struct cpufreq_ - } - WRITE_ONCE(cpudata->epp_cached, epp); - -+ value = READ_ONCE(cpudata->cppc_req_cached); -+ value &= ~AMD_CPPC_EPP_PERF_MASK; -+ value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); -+ WRITE_ONCE(cpudata->cppc_req_cached, value); -+ - return ret; - } - diff --git a/debian/patches/patchset-pf/amd-pstate/0021-cpufreq-amd-pstate-Drop-debug-statements-for-policy-.patch b/debian/patches/patchset-pf/amd-pstate/0021-cpufreq-amd-pstate-Drop-debug-statements-for-policy-.patch deleted file mode 100644 index fd40203..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0021-cpufreq-amd-pstate-Drop-debug-statements-for-policy-.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 6c2201fe880d7d35fbde67d74ec1989f053cc0bd Mon Sep 17 00:00:00 2001 -From: Mario Limonciello -Date: Wed, 26 Feb 2025 01:49:31 -0600 -Subject: cpufreq/amd-pstate: Drop debug statements for policy setting - -There are trace events that exist now for all amd-pstate modes that -will output information right before programming to the hardware. - -This makes the existing debug statements unnecessary remaining -overhead. Drop them. - -Reviewed-by: Dhananjay Ugwekar -Reviewed-by: Gautham R. Shenoy -Signed-off-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate.c | 4 ---- - 1 file changed, 4 deletions(-) - ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -667,7 +667,6 @@ static int amd_pstate_verify(struct cpuf - } - - cpufreq_verify_within_cpu_limits(policy_data); -- pr_debug("policy_max =%d, policy_min=%d\n", policy_data->max, policy_data->min); - - return 0; - } -@@ -1630,9 +1629,6 @@ static int amd_pstate_epp_set_policy(str - if (!policy->cpuinfo.max_freq) - return -ENODEV; - -- pr_debug("set_policy: cpuinfo.max %u policy->max %u\n", -- policy->cpuinfo.max_freq, policy->max); -- - cpudata->policy = policy->policy; - - ret = amd_pstate_epp_update_limit(policy); diff --git a/debian/patches/patchset-pf/amd-pstate/0022-cpufreq-amd-pstate-Rework-CPPC-enabling.patch b/debian/patches/patchset-pf/amd-pstate/0022-cpufreq-amd-pstate-Rework-CPPC-enabling.patch deleted file mode 100644 index a816053..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0022-cpufreq-amd-pstate-Rework-CPPC-enabling.patch +++ /dev/null @@ -1,327 +0,0 @@ -From 3c5030a27361deff20bec5d43339109901f3198c Mon Sep 17 00:00:00 2001 -From: Mario Limonciello -Date: Wed, 26 Feb 2025 01:49:32 -0600 -Subject: cpufreq/amd-pstate: Rework CPPC enabling - -The CPPC enable register is configured as "write once". That is -any future writes don't actually do anything. - -Because of this, all the cleanup paths that currently exist for -CPPC disable are non-effective. - -Rework CPPC enable to only enable after all the CAP registers have -been read to avoid enabling CPPC on CPUs with invalid _CPC or -unpopulated MSRs. - -As the register is write once, remove all cleanup paths as well. - -Signed-off-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate.c | 179 +++++++---------------------------- - 1 file changed, 35 insertions(+), 144 deletions(-) - ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -85,7 +85,6 @@ static struct cpufreq_driver *current_ps - static struct cpufreq_driver amd_pstate_driver; - static struct cpufreq_driver amd_pstate_epp_driver; - static int cppc_state = AMD_PSTATE_UNDEFINED; --static bool cppc_enabled; - static bool amd_pstate_prefcore = true; - static struct quirk_entry *quirks; - -@@ -371,89 +370,21 @@ static int shmem_set_epp(struct cpufreq_ - return ret; - } - --static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy, -- int pref_index) -+static inline int msr_cppc_enable(struct cpufreq_policy *policy) - { -- struct amd_cpudata *cpudata = policy->driver_data; -- u8 epp; -- -- if (!pref_index) -- epp = cpudata->epp_default; -- else -- epp = epp_values[pref_index]; -- -- if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { -- pr_debug("EPP cannot be set under performance policy\n"); -- return -EBUSY; -- } -- -- return amd_pstate_set_epp(policy, epp); --} -- --static inline int msr_cppc_enable(bool enable) --{ -- int ret, cpu; -- unsigned long logical_proc_id_mask = 0; -- -- /* -- * MSR_AMD_CPPC_ENABLE is write-once, once set it cannot be cleared. -- */ -- if (!enable) -- return 0; -- -- if (enable == cppc_enabled) -- return 0; -- -- for_each_present_cpu(cpu) { -- unsigned long logical_id = topology_logical_package_id(cpu); -- -- if (test_bit(logical_id, &logical_proc_id_mask)) -- continue; -- -- set_bit(logical_id, &logical_proc_id_mask); -- -- ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE, -- enable); -- if (ret) -- return ret; -- } -- -- cppc_enabled = enable; -- return 0; -+ return wrmsrl_safe_on_cpu(policy->cpu, MSR_AMD_CPPC_ENABLE, 1); - } - --static int shmem_cppc_enable(bool enable) -+static int shmem_cppc_enable(struct cpufreq_policy *policy) - { -- int cpu, ret = 0; -- struct cppc_perf_ctrls perf_ctrls; -- -- if (enable == cppc_enabled) -- return 0; -- -- for_each_present_cpu(cpu) { -- ret = cppc_set_enable(cpu, enable); -- if (ret) -- return ret; -- -- /* Enable autonomous mode for EPP */ -- if (cppc_state == AMD_PSTATE_ACTIVE) { -- /* Set desired perf as zero to allow EPP firmware control */ -- perf_ctrls.desired_perf = 0; -- ret = cppc_set_perf(cpu, &perf_ctrls); -- if (ret) -- return ret; -- } -- } -- -- cppc_enabled = enable; -- return ret; -+ return cppc_set_enable(policy->cpu, 1); - } - - DEFINE_STATIC_CALL(amd_pstate_cppc_enable, msr_cppc_enable); - --static inline int amd_pstate_cppc_enable(bool enable) -+static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy) - { -- return static_call(amd_pstate_cppc_enable)(enable); -+ return static_call(amd_pstate_cppc_enable)(policy); - } - - static int msr_init_perf(struct amd_cpudata *cpudata) -@@ -1063,6 +994,10 @@ static int amd_pstate_cpu_init(struct cp - cpudata->nominal_freq, - perf.highest_perf); - -+ ret = amd_pstate_cppc_enable(policy); -+ if (ret) -+ goto free_cpudata1; -+ - policy->boost_enabled = READ_ONCE(cpudata->boost_supported); - - /* It will be updated by governor */ -@@ -1110,28 +1045,6 @@ static void amd_pstate_cpu_exit(struct c - kfree(cpudata); - } - --static int amd_pstate_cpu_resume(struct cpufreq_policy *policy) --{ -- int ret; -- -- ret = amd_pstate_cppc_enable(true); -- if (ret) -- pr_err("failed to enable amd-pstate during resume, return %d\n", ret); -- -- return ret; --} -- --static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy) --{ -- int ret; -- -- ret = amd_pstate_cppc_enable(false); -- if (ret) -- pr_err("failed to disable amd-pstate during suspend, return %d\n", ret); -- -- return ret; --} -- - /* Sysfs attributes */ - - /* -@@ -1223,8 +1136,10 @@ static ssize_t show_energy_performance_a - static ssize_t store_energy_performance_preference( - struct cpufreq_policy *policy, const char *buf, size_t count) - { -+ struct amd_cpudata *cpudata = policy->driver_data; - char str_preference[21]; - ssize_t ret; -+ u8 epp; - - ret = sscanf(buf, "%20s", str_preference); - if (ret != 1) -@@ -1234,7 +1149,17 @@ static ssize_t store_energy_performance_ - if (ret < 0) - return -EINVAL; - -- ret = amd_pstate_set_energy_pref_index(policy, ret); -+ if (!ret) -+ epp = cpudata->epp_default; -+ else -+ epp = epp_values[ret]; -+ -+ if (epp > 0 && policy->policy == CPUFREQ_POLICY_PERFORMANCE) { -+ pr_debug("EPP cannot be set under performance policy\n"); -+ return -EBUSY; -+ } -+ -+ ret = amd_pstate_set_epp(policy, epp); - - return ret ? ret : count; - } -@@ -1267,7 +1192,6 @@ static ssize_t show_energy_performance_p - - static void amd_pstate_driver_cleanup(void) - { -- amd_pstate_cppc_enable(false); - cppc_state = AMD_PSTATE_DISABLE; - current_pstate_driver = NULL; - } -@@ -1301,14 +1225,6 @@ static int amd_pstate_register_driver(in - - cppc_state = mode; - -- ret = amd_pstate_cppc_enable(true); -- if (ret) { -- pr_err("failed to enable cppc during amd-pstate driver registration, return %d\n", -- ret); -- amd_pstate_driver_cleanup(); -- return ret; -- } -- - /* at least one CPU supports CPB */ - current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB); - -@@ -1548,11 +1464,15 @@ static int amd_pstate_epp_cpu_init(struc - policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf, - cpudata->nominal_freq, - perf.highest_perf); -+ policy->driver_data = cpudata; -+ -+ ret = amd_pstate_cppc_enable(policy); -+ if (ret) -+ goto free_cpudata1; - - /* It will be updated by governor */ - policy->cur = policy->cpuinfo.min_freq; - -- policy->driver_data = cpudata; - - policy->boost_enabled = READ_ONCE(cpudata->boost_supported); - -@@ -1644,31 +1564,11 @@ static int amd_pstate_epp_set_policy(str - return 0; - } - --static int amd_pstate_epp_reenable(struct cpufreq_policy *policy) --{ -- int ret; -- -- ret = amd_pstate_cppc_enable(true); -- if (ret) -- pr_err("failed to enable amd pstate during resume, return %d\n", ret); -- -- -- return amd_pstate_epp_update_limit(policy); --} -- - static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) - { -- struct amd_cpudata *cpudata = policy->driver_data; -- int ret; -- -- pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); -+ pr_debug("AMD CPU Core %d going online\n", policy->cpu); - -- ret = amd_pstate_epp_reenable(policy); -- if (ret) -- return ret; -- cpudata->suspended = false; -- -- return 0; -+ return amd_pstate_cppc_enable(policy); - } - - static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) -@@ -1686,11 +1586,6 @@ static int amd_pstate_epp_cpu_offline(st - static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) - { - struct amd_cpudata *cpudata = policy->driver_data; -- int ret; -- -- /* avoid suspending when EPP is not enabled */ -- if (cppc_state != AMD_PSTATE_ACTIVE) -- return 0; - - /* invalidate to ensure it's rewritten during resume */ - cpudata->cppc_req_cached = 0; -@@ -1698,11 +1593,6 @@ static int amd_pstate_epp_suspend(struct - /* set this flag to avoid setting core offline*/ - cpudata->suspended = true; - -- /* disable CPPC in lowlevel firmware */ -- ret = amd_pstate_cppc_enable(false); -- if (ret) -- pr_err("failed to suspend, return %d\n", ret); -- - return 0; - } - -@@ -1711,8 +1601,12 @@ static int amd_pstate_epp_resume(struct - struct amd_cpudata *cpudata = policy->driver_data; - - if (cpudata->suspended) { -+ int ret; -+ - /* enable amd pstate from suspend state*/ -- amd_pstate_epp_reenable(policy); -+ ret = amd_pstate_epp_update_limit(policy); -+ if (ret) -+ return ret; - - cpudata->suspended = false; - } -@@ -1727,8 +1621,6 @@ static struct cpufreq_driver amd_pstate_ - .fast_switch = amd_pstate_fast_switch, - .init = amd_pstate_cpu_init, - .exit = amd_pstate_cpu_exit, -- .suspend = amd_pstate_cpu_suspend, -- .resume = amd_pstate_cpu_resume, - .set_boost = amd_pstate_set_boost, - .update_limits = amd_pstate_update_limits, - .name = "amd-pstate", -@@ -1895,7 +1787,6 @@ static int __init amd_pstate_init(void) - - global_attr_free: - cpufreq_unregister_driver(current_pstate_driver); -- amd_pstate_cppc_enable(false); - return ret; - } - device_initcall(amd_pstate_init); diff --git a/debian/patches/patchset-pf/amd-pstate/0023-cpufreq-amd-pstate-Stop-caching-EPP.patch b/debian/patches/patchset-pf/amd-pstate/0023-cpufreq-amd-pstate-Stop-caching-EPP.patch deleted file mode 100644 index 4435df7..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0023-cpufreq-amd-pstate-Stop-caching-EPP.patch +++ /dev/null @@ -1,105 +0,0 @@ -From c06cca99a6d74e7a6d6f020dbf982b0b9bf704e6 Mon Sep 17 00:00:00 2001 -From: Mario Limonciello -Date: Wed, 26 Feb 2025 01:49:33 -0600 -Subject: cpufreq/amd-pstate: Stop caching EPP - -EPP values are cached in the cpudata structure per CPU. This is needless -though because they are also cached in the CPPC request variable. - -Drop the separate cache for EPP values and always reference the CPPC -request variable when needed. - -Reviewed-by: Dhananjay Ugwekar -Reviewed-by: Gautham R. Shenoy -Signed-off-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate.c | 19 ++++++++++--------- - drivers/cpufreq/amd-pstate.h | 1 - - 2 files changed, 10 insertions(+), 10 deletions(-) - ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -268,8 +268,6 @@ static int msr_update_perf(struct cpufre - } - - WRITE_ONCE(cpudata->cppc_req_cached, value); -- if (epp != cpudata->epp_cached) -- WRITE_ONCE(cpudata->epp_cached, epp); - - return 0; - } -@@ -318,7 +316,6 @@ static int msr_set_epp(struct cpufreq_po - } - - /* update both so that msr_update_perf() can effectively check */ -- WRITE_ONCE(cpudata->epp_cached, epp); - WRITE_ONCE(cpudata->cppc_req_cached, value); - - return ret; -@@ -335,9 +332,12 @@ static int shmem_set_epp(struct cpufreq_ - { - struct amd_cpudata *cpudata = policy->driver_data; - struct cppc_perf_ctrls perf_ctrls; -+ u8 epp_cached; - u64 value; - int ret; - -+ -+ epp_cached = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); - if (trace_amd_pstate_epp_perf_enabled()) { - union perf_cached perf = cpudata->perf; - -@@ -348,10 +348,10 @@ static int shmem_set_epp(struct cpufreq_ - FIELD_GET(AMD_CPPC_MAX_PERF_MASK, - cpudata->cppc_req_cached), - policy->boost_enabled, -- epp != cpudata->epp_cached); -+ epp != epp_cached); - } - -- if (epp == cpudata->epp_cached) -+ if (epp == epp_cached) - return 0; - - perf_ctrls.energy_perf = epp; -@@ -360,7 +360,6 @@ static int shmem_set_epp(struct cpufreq_ - pr_debug("failed to set energy perf value (%d)\n", ret); - return ret; - } -- WRITE_ONCE(cpudata->epp_cached, epp); - - value = READ_ONCE(cpudata->cppc_req_cached); - value &= ~AMD_CPPC_EPP_PERF_MASK; -@@ -1168,9 +1167,11 @@ static ssize_t show_energy_performance_p - struct cpufreq_policy *policy, char *buf) - { - struct amd_cpudata *cpudata = policy->driver_data; -- u8 preference; -+ u8 preference, epp; -+ -+ epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); - -- switch (cpudata->epp_cached) { -+ switch (epp) { - case AMD_CPPC_EPP_PERFORMANCE: - preference = EPP_INDEX_PERFORMANCE; - break; -@@ -1533,7 +1534,7 @@ static int amd_pstate_epp_update_limit(s - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - epp = 0; - else -- epp = READ_ONCE(cpudata->epp_cached); -+ epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); - - perf = READ_ONCE(cpudata->perf); - ---- a/drivers/cpufreq/amd-pstate.h -+++ b/drivers/cpufreq/amd-pstate.h -@@ -102,7 +102,6 @@ struct amd_cpudata { - bool hw_prefcore; - - /* EPP feature related attributes*/ -- u8 epp_cached; - u32 policy; - bool suspended; - u8 epp_default; diff --git a/debian/patches/patchset-pf/amd-pstate/0024-cpufreq-amd-pstate-Drop-actions-in-amd_pstate_epp_cp.patch b/debian/patches/patchset-pf/amd-pstate/0024-cpufreq-amd-pstate-Drop-actions-in-amd_pstate_epp_cp.patch deleted file mode 100644 index 463b529..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0024-cpufreq-amd-pstate-Drop-actions-in-amd_pstate_epp_cp.patch +++ /dev/null @@ -1,39 +0,0 @@ -From a82e4f4eb6e5e9806c66285cb3cefde644b8ea6b Mon Sep 17 00:00:00 2001 -From: Mario Limonciello -Date: Wed, 26 Feb 2025 01:49:34 -0600 -Subject: cpufreq/amd-pstate: Drop actions in amd_pstate_epp_cpu_offline() - -When the CPU goes offline there is no need to change the CPPC request -because the CPU will go into the deepest C-state it supports already. - -Actually changing the CPPC request when it goes offline messes up the -cached values and can lead to the wrong values being restored when -it comes back. - -Instead drop the actions and if the CPU comes back online let -amd_pstate_epp_set_policy() restore it to expected values. - -Reviewed-by: Dhananjay Ugwekar -Signed-off-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate.c | 9 +-------- - 1 file changed, 1 insertion(+), 8 deletions(-) - ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -1574,14 +1574,7 @@ static int amd_pstate_epp_cpu_online(str - - static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) - { -- struct amd_cpudata *cpudata = policy->driver_data; -- union perf_cached perf = READ_ONCE(cpudata->perf); -- -- if (cpudata->suspended) -- return 0; -- -- return amd_pstate_update_perf(policy, perf.lowest_perf, 0, perf.lowest_perf, -- AMD_CPPC_EPP_BALANCE_POWERSAVE, false); -+ return 0; - } - - static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) diff --git a/debian/patches/patchset-pf/amd-pstate/0025-cpufreq-amd-pstate-fix-warning-noticed-by-kernel-tes.patch b/debian/patches/patchset-pf/amd-pstate/0025-cpufreq-amd-pstate-fix-warning-noticed-by-kernel-tes.patch deleted file mode 100644 index 325bb6a..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0025-cpufreq-amd-pstate-fix-warning-noticed-by-kernel-tes.patch +++ /dev/null @@ -1,41 +0,0 @@ -From de3dd387423b30565e846e0ff4424e2c99164030 Mon Sep 17 00:00:00 2001 -From: Mario Limonciello -Date: Thu, 27 Feb 2025 14:09:08 -0600 -Subject: cpufreq/amd-pstate: fix warning noticed by kernel test robot - -Reported-by: kernel test robot -Closes: https://lore.kernel.org/oe-kbuild-all/202502272001.nafS0qXq-lkp@intel.com/ -Signed-off-by: Oleksandr Natalenko ---- - drivers/cpufreq/amd-pstate.c | 13 ++++++------- - 1 file changed, 6 insertions(+), 7 deletions(-) - ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -903,20 +903,19 @@ static int amd_pstate_init_freq(struct a - return ret; - perf = READ_ONCE(cpudata->perf); - -+ if (quirks && quirks->nominal_freq) -+ nominal_freq = quirks->nominal_freq; -+ else -+ nominal_freq = cppc_perf.nominal_freq; -+ nominal_freq *= 1000; -+ - if (quirks && quirks->lowest_freq) { - min_freq = quirks->lowest_freq; - perf.lowest_perf = freq_to_perf(perf, nominal_freq, min_freq); - WRITE_ONCE(cpudata->perf, perf); - } else - min_freq = cppc_perf.lowest_freq; -- -- if (quirks && quirks->nominal_freq) -- nominal_freq = quirks->nominal_freq; -- else -- nominal_freq = cppc_perf.nominal_freq; -- - min_freq *= 1000; -- nominal_freq *= 1000; - - WRITE_ONCE(cpudata->nominal_freq, nominal_freq); - diff --git a/debian/patches/patchset-pf/amd-pstate/0026-cpufreq-amd-pstate-Fix-min_limit-perf-and-freq-updat.patch b/debian/patches/patchset-pf/amd-pstate/0026-cpufreq-amd-pstate-Fix-min_limit-perf-and-freq-updat.patch deleted file mode 100644 index 99217a4..0000000 --- a/debian/patches/patchset-pf/amd-pstate/0026-cpufreq-amd-pstate-Fix-min_limit-perf-and-freq-updat.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 7e68278a4a90d52966b923404a2d280e3a83b66f Mon Sep 17 00:00:00 2001 -From: Dhananjay Ugwekar -Date: Mon, 7 Apr 2025 08:19:26 +0000 -Subject: cpufreq/amd-pstate: Fix min_limit perf and freq updation for - performance governor - -The min_limit perf and freq values can get disconnected with performance -governor, as we only modify the perf value in the special case. Fix that -by modifying the perf and freq values together - -Fixes: 009d1c29a451 ("cpufreq/amd-pstate: Move perf values into a union") -Signed-off-by: Dhananjay Ugwekar -Reviewed-by: Mario Limonciello -Link: https://lore.kernel.org/r/20250407081925.850473-1-dhananjay.ugwekar@amd.com -Signed-off-by: Mario Limonciello ---- - drivers/cpufreq/amd-pstate.c | 11 +++++++---- - 1 file changed, 7 insertions(+), 4 deletions(-) - ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -607,13 +607,16 @@ static void amd_pstate_update_min_max_li - union perf_cached perf = READ_ONCE(cpudata->perf); - - perf.max_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->max); -- perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min); -+ WRITE_ONCE(cpudata->max_limit_freq, policy->max); - -- if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) -+ if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { - perf.min_limit_perf = min(perf.nominal_perf, perf.max_limit_perf); -+ WRITE_ONCE(cpudata->min_limit_freq, min(cpudata->nominal_freq, cpudata->max_limit_freq)); -+ } else { -+ perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min); -+ WRITE_ONCE(cpudata->min_limit_freq, policy->min); -+ } - -- WRITE_ONCE(cpudata->max_limit_freq, policy->max); -- WRITE_ONCE(cpudata->min_limit_freq, policy->min); - WRITE_ONCE(cpudata->perf, perf); - } - diff --git a/debian/patches/patchset-pf/cpuidle/0001-cpuidle-Prefer-teo-over-menu-governor.patch b/debian/patches/patchset-pf/cpuidle/0001-cpuidle-Prefer-teo-over-menu-governor.patch index f863bb1..47f28aa 100644 --- a/debian/patches/patchset-pf/cpuidle/0001-cpuidle-Prefer-teo-over-menu-governor.patch +++ b/debian/patches/patchset-pf/cpuidle/0001-cpuidle-Prefer-teo-over-menu-governor.patch @@ -1,4 +1,4 @@ -From 247749c27f92a789d4f1727aa870167c25ca3c5e Mon Sep 17 00:00:00 2001 +From 1cb9f09cead0ba384729bfdc74d6fa21d586530c Mon Sep 17 00:00:00 2001 From: Christian Loehle Date: Thu, 5 Sep 2024 10:26:39 +0100 Subject: cpuidle: Prefer teo over menu governor @@ -36,7 +36,7 @@ Signed-off-by: Christian Loehle depends on KVM_GUEST --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c -@@ -519,7 +519,7 @@ static int menu_enable_device(struct cpu +@@ -513,7 +513,7 @@ static int menu_enable_device(struct cpu static struct cpuidle_governor menu_governor = { .name = "menu", diff --git a/debian/patches/patchset-pf/crypto/0001-crypto-x86-aes-xts-make-the-fast-path-64-bit-specifi.patch b/debian/patches/patchset-pf/crypto/0001-crypto-x86-aes-xts-make-the-fast-path-64-bit-specifi.patch deleted file mode 100644 index cdaf353..0000000 --- a/debian/patches/patchset-pf/crypto/0001-crypto-x86-aes-xts-make-the-fast-path-64-bit-specifi.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 5e5a835c50afc3b9bb2b8b9175d0924abb5a7f3c Mon Sep 17 00:00:00 2001 -From: Eric Biggers -Date: Mon, 27 Jan 2025 13:16:09 -0800 -Subject: crypto: x86/aes-xts - make the fast path 64-bit specific - -Remove 32-bit support from the fast path in xts_crypt(). Then optimize -it for 64-bit, and simplify the code, by switching to sg_virt() and -removing the now-unnecessary checks for crossing a page boundary. - -The result is simpler code that is slightly smaller and faster in the -case that actually matters (64-bit). - -Signed-off-by: Eric Biggers ---- - arch/x86/crypto/aesni-intel_glue.c | 30 ++++++++++-------------------- - 1 file changed, 10 insertions(+), 20 deletions(-) - ---- a/arch/x86/crypto/aesni-intel_glue.c -+++ b/arch/x86/crypto/aesni-intel_glue.c -@@ -581,11 +581,8 @@ xts_crypt(struct skcipher_request *req, - { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); -- const unsigned int cryptlen = req->cryptlen; -- struct scatterlist *src = req->src; -- struct scatterlist *dst = req->dst; - -- if (unlikely(cryptlen < AES_BLOCK_SIZE)) -+ if (unlikely(req->cryptlen < AES_BLOCK_SIZE)) - return -EINVAL; - - kernel_fpu_begin(); -@@ -593,23 +590,16 @@ xts_crypt(struct skcipher_request *req, - - /* - * In practice, virtually all XTS plaintexts and ciphertexts are either -- * 512 or 4096 bytes, aligned such that they don't span page boundaries. -- * To optimize the performance of these cases, and also any other case -- * where no page boundary is spanned, the below fast-path handles -- * single-page sources and destinations as efficiently as possible. -+ * 512 or 4096 bytes and do not use multiple scatterlist elements. To -+ * optimize the performance of these cases, the below fast-path handles -+ * single-scatterlist-element messages as efficiently as possible. The -+ * code is 64-bit specific, as it assumes no page mapping is needed. - */ -- if (likely(src->length >= cryptlen && dst->length >= cryptlen && -- src->offset + cryptlen <= PAGE_SIZE && -- dst->offset + cryptlen <= PAGE_SIZE)) { -- struct page *src_page = sg_page(src); -- struct page *dst_page = sg_page(dst); -- void *src_virt = kmap_local_page(src_page) + src->offset; -- void *dst_virt = kmap_local_page(dst_page) + dst->offset; -- -- (*crypt_func)(&ctx->crypt_ctx, src_virt, dst_virt, cryptlen, -- req->iv); -- kunmap_local(dst_virt); -- kunmap_local(src_virt); -+ if (IS_ENABLED(CONFIG_X86_64) && -+ likely(req->src->length >= req->cryptlen && -+ req->dst->length >= req->cryptlen)) { -+ (*crypt_func)(&ctx->crypt_ctx, sg_virt(req->src), -+ sg_virt(req->dst), req->cryptlen, req->iv); - kernel_fpu_end(); - return 0; - } diff --git a/debian/patches/patchset-pf/crypto/0002-crypto-x86-aes-ctr-rewrite-AESNI-AVX-optimized-CTR-a.patch b/debian/patches/patchset-pf/crypto/0002-crypto-x86-aes-ctr-rewrite-AESNI-AVX-optimized-CTR-a.patch deleted file mode 100644 index 82177b9..0000000 --- a/debian/patches/patchset-pf/crypto/0002-crypto-x86-aes-ctr-rewrite-AESNI-AVX-optimized-CTR-a.patch +++ /dev/null @@ -1,1857 +0,0 @@ -From 9564bcf085acd0bdea688cb6165302a6871a7c08 Mon Sep 17 00:00:00 2001 -From: Eric Biggers -Date: Mon, 10 Feb 2025 08:50:20 -0800 -Subject: crypto: x86/aes-ctr - rewrite AESNI+AVX optimized CTR and add VAES - support - -Delete aes_ctrby8_avx-x86_64.S and add a new assembly file -aes-ctr-avx-x86_64.S which follows a similar approach to -aes-xts-avx-x86_64.S in that it uses a "template" to provide AESNI+AVX, -VAES+AVX2, VAES+AVX10/256, and VAES+AVX10/512 code, instead of just -AESNI+AVX. Wire it up to the crypto API accordingly. - -This greatly improves the performance of AES-CTR and AES-XCTR on -VAES-capable CPUs, with the best case being AMD Zen 5 where an over 230% -increase in throughput is seen on long messages. Performance on -non-VAES-capable CPUs remains about the same, and the non-AVX AES-CTR -code (aesni_ctr_enc) is also kept as-is for now. There are some slight -regressions (less than 10%) on some short message lengths on some CPUs; -these are difficult to avoid, given how the previous code was so heavily -unrolled by message length, and they are not particularly important. -Detailed performance results are given in the tables below. - -Both CTR and XCTR support is retained. The main loop remains -8-vector-wide, which differs from the 4-vector-wide main loops that are -used in the XTS and GCM code. A wider loop is appropriate for CTR and -XCTR since they have fewer other instructions (such as vpclmulqdq) to -interleave with the AES instructions. - -Similar to what was the case for AES-GCM, the new assembly code also has -a much smaller binary size, as it fixes the excessive unrolling by data -length and key length present in the old code. Specifically, the new -assembly file compiles to about 9 KB of text vs. 28 KB for the old file. -This is despite 4x as many implementations being included. - -The tables below show the detailed performance results. The tables show -percentage improvement in single-threaded throughput for repeated -encryption of the given message length; an increase from 6000 MB/s to -12000 MB/s would be listed as 100%. They were collected by directly -measuring the Linux crypto API performance using a custom kernel module. -The tested CPUs were all server processors from Google Compute Engine -except for Zen 5 which was a Ryzen 9 9950X desktop processor. - -Table 1: AES-256-CTR throughput improvement, - CPU microarchitecture vs. message length in bytes: - - | 16384 | 4096 | 4095 | 1420 | 512 | 500 | ----------------------+-------+-------+-------+-------+-------+-------+ -AMD Zen 5 | 232% | 203% | 212% | 143% | 71% | 95% | -Intel Emerald Rapids | 116% | 116% | 117% | 91% | 78% | 79% | -Intel Ice Lake | 109% | 103% | 107% | 81% | 54% | 56% | -AMD Zen 4 | 109% | 91% | 100% | 70% | 43% | 59% | -AMD Zen 3 | 92% | 78% | 87% | 57% | 32% | 43% | -AMD Zen 2 | 9% | 8% | 14% | 12% | 8% | 21% | -Intel Skylake | 7% | 7% | 8% | 5% | 3% | 8% | - - | 300 | 200 | 64 | 63 | 16 | ----------------------+-------+-------+-------+-------+-------+ -AMD Zen 5 | 57% | 39% | -9% | 7% | -7% | -Intel Emerald Rapids | 37% | 42% | -0% | 13% | -8% | -Intel Ice Lake | 39% | 30% | -1% | 14% | -9% | -AMD Zen 4 | 42% | 38% | -0% | 18% | -3% | -AMD Zen 3 | 38% | 35% | 6% | 31% | 5% | -AMD Zen 2 | 24% | 23% | 5% | 30% | 3% | -Intel Skylake | 9% | 1% | -4% | 10% | -7% | - -Table 2: AES-256-XCTR throughput improvement, - CPU microarchitecture vs. message length in bytes: - - | 16384 | 4096 | 4095 | 1420 | 512 | 500 | ----------------------+-------+-------+-------+-------+-------+-------+ -AMD Zen 5 | 240% | 201% | 216% | 151% | 75% | 108% | -Intel Emerald Rapids | 100% | 99% | 102% | 91% | 94% | 104% | -Intel Ice Lake | 93% | 89% | 92% | 74% | 50% | 64% | -AMD Zen 4 | 86% | 75% | 83% | 60% | 41% | 52% | -AMD Zen 3 | 73% | 63% | 69% | 45% | 21% | 33% | -AMD Zen 2 | -2% | -2% | 2% | 3% | -1% | 11% | -Intel Skylake | -1% | -1% | 1% | 2% | -1% | 9% | - - | 300 | 200 | 64 | 63 | 16 | ----------------------+-------+-------+-------+-------+-------+ -AMD Zen 5 | 78% | 56% | -4% | 38% | -2% | -Intel Emerald Rapids | 61% | 55% | 4% | 32% | -5% | -Intel Ice Lake | 57% | 42% | 3% | 44% | -4% | -AMD Zen 4 | 35% | 28% | -1% | 17% | -3% | -AMD Zen 3 | 26% | 23% | -3% | 11% | -6% | -AMD Zen 2 | 13% | 24% | -1% | 14% | -3% | -Intel Skylake | 16% | 8% | -4% | 35% | -3% | - -Signed-off-by: Eric Biggers ---- - arch/x86/crypto/Makefile | 2 +- - arch/x86/crypto/aes-ctr-avx-x86_64.S | 592 +++++++++++++++++++++++ - arch/x86/crypto/aes_ctrby8_avx-x86_64.S | 597 ------------------------ - arch/x86/crypto/aesni-intel_glue.c | 404 ++++++++-------- - 4 files changed, 803 insertions(+), 792 deletions(-) - create mode 100644 arch/x86/crypto/aes-ctr-avx-x86_64.S - delete mode 100644 arch/x86/crypto/aes_ctrby8_avx-x86_64.S - ---- a/arch/x86/crypto/Makefile -+++ b/arch/x86/crypto/Makefile -@@ -48,7 +48,7 @@ chacha-x86_64-$(CONFIG_AS_AVX512) += cha - - obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o - aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o --aesni-intel-$(CONFIG_64BIT) += aes_ctrby8_avx-x86_64.o \ -+aesni-intel-$(CONFIG_64BIT) += aes-ctr-avx-x86_64.o \ - aes-gcm-aesni-x86_64.o \ - aes-xts-avx-x86_64.o - ifeq ($(CONFIG_AS_VAES)$(CONFIG_AS_VPCLMULQDQ),yy) ---- /dev/null -+++ b/arch/x86/crypto/aes-ctr-avx-x86_64.S -@@ -0,0 +1,592 @@ -+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ -+// -+// Copyright 2025 Google LLC -+// -+// Author: Eric Biggers -+// -+// This file is dual-licensed, meaning that you can use it under your choice of -+// either of the following two licenses: -+// -+// Licensed under the Apache License 2.0 (the "License"). You may obtain a copy -+// of the License at -+// -+// https://www.apache.org/licenses/LICENSE-2.0 -+// -+// Unless required by applicable law or agreed to in writing, software -+// distributed under the License is distributed on an "AS IS" BASIS, -+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+// See the License for the specific language governing permissions and -+// limitations under the License. -+// -+// or -+// -+// Redistribution and use in source and binary forms, with or without -+// modification, are permitted provided that the following conditions are met: -+// -+// 1. Redistributions of source code must retain the above copyright notice, -+// this list of conditions and the following disclaimer. -+// -+// 2. Redistributions in binary form must reproduce the above copyright -+// notice, this list of conditions and the following disclaimer in the -+// documentation and/or other materials provided with the distribution. -+// -+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+// POSSIBILITY OF SUCH DAMAGE. -+// -+//------------------------------------------------------------------------------ -+// -+// This file contains x86_64 assembly implementations of AES-CTR and AES-XCTR -+// using the following sets of CPU features: -+// - AES-NI && AVX -+// - VAES && AVX2 -+// - VAES && (AVX10/256 || (AVX512BW && AVX512VL)) && BMI2 -+// - VAES && (AVX10/512 || (AVX512BW && AVX512VL)) && BMI2 -+// -+// See the function definitions at the bottom of the file for more information. -+ -+#include -+#include -+ -+.section .rodata -+.p2align 4 -+ -+.Lbswap_mask: -+ .octa 0x000102030405060708090a0b0c0d0e0f -+ -+.Lctr_pattern: -+ .quad 0, 0 -+.Lone: -+ .quad 1, 0 -+.Ltwo: -+ .quad 2, 0 -+ .quad 3, 0 -+ -+.Lfour: -+ .quad 4, 0 -+ -+.text -+ -+// Move a vector between memory and a register. -+// The register operand must be in the first 16 vector registers. -+.macro _vmovdqu src, dst -+.if VL < 64 -+ vmovdqu \src, \dst -+.else -+ vmovdqu8 \src, \dst -+.endif -+.endm -+ -+// Move a vector between registers. -+// The registers must be in the first 16 vector registers. -+.macro _vmovdqa src, dst -+.if VL < 64 -+ vmovdqa \src, \dst -+.else -+ vmovdqa64 \src, \dst -+.endif -+.endm -+ -+// Broadcast a 128-bit value from memory to all 128-bit lanes of a vector -+// register. The register operand must be in the first 16 vector registers. -+.macro _vbroadcast128 src, dst -+.if VL == 16 -+ vmovdqu \src, \dst -+.elseif VL == 32 -+ vbroadcasti128 \src, \dst -+.else -+ vbroadcasti32x4 \src, \dst -+.endif -+.endm -+ -+// XOR two vectors together. -+// Any register operands must be in the first 16 vector registers. -+.macro _vpxor src1, src2, dst -+.if VL < 64 -+ vpxor \src1, \src2, \dst -+.else -+ vpxord \src1, \src2, \dst -+.endif -+.endm -+ -+// Load 1 <= %ecx <= 15 bytes from the pointer \src into the xmm register \dst -+// and zeroize any remaining bytes. Clobbers %rax, %rcx, and \tmp{64,32}. -+.macro _load_partial_block src, dst, tmp64, tmp32 -+ sub $8, %ecx // LEN - 8 -+ jle .Lle8\@ -+ -+ // Load 9 <= LEN <= 15 bytes. -+ vmovq (\src), \dst // Load first 8 bytes -+ mov (\src, %rcx), %rax // Load last 8 bytes -+ neg %ecx -+ shl $3, %ecx -+ shr %cl, %rax // Discard overlapping bytes -+ vpinsrq $1, %rax, \dst, \dst -+ jmp .Ldone\@ -+ -+.Lle8\@: -+ add $4, %ecx // LEN - 4 -+ jl .Llt4\@ -+ -+ // Load 4 <= LEN <= 8 bytes. -+ mov (\src), %eax // Load first 4 bytes -+ mov (\src, %rcx), \tmp32 // Load last 4 bytes -+ jmp .Lcombine\@ -+ -+.Llt4\@: -+ // Load 1 <= LEN <= 3 bytes. -+ add $2, %ecx // LEN - 2 -+ movzbl (\src), %eax // Load first byte -+ jl .Lmovq\@ -+ movzwl (\src, %rcx), \tmp32 // Load last 2 bytes -+.Lcombine\@: -+ shl $3, %ecx -+ shl %cl, \tmp64 -+ or \tmp64, %rax // Combine the two parts -+.Lmovq\@: -+ vmovq %rax, \dst -+.Ldone\@: -+.endm -+ -+// Store 1 <= %ecx <= 15 bytes from the xmm register \src to the pointer \dst. -+// Clobbers %rax, %rcx, and \tmp{64,32}. -+.macro _store_partial_block src, dst, tmp64, tmp32 -+ sub $8, %ecx // LEN - 8 -+ jl .Llt8\@ -+ -+ // Store 8 <= LEN <= 15 bytes. -+ vpextrq $1, \src, %rax -+ mov %ecx, \tmp32 -+ shl $3, %ecx -+ ror %cl, %rax -+ mov %rax, (\dst, \tmp64) // Store last LEN - 8 bytes -+ vmovq \src, (\dst) // Store first 8 bytes -+ jmp .Ldone\@ -+ -+.Llt8\@: -+ add $4, %ecx // LEN - 4 -+ jl .Llt4\@ -+ -+ // Store 4 <= LEN <= 7 bytes. -+ vpextrd $1, \src, %eax -+ mov %ecx, \tmp32 -+ shl $3, %ecx -+ ror %cl, %eax -+ mov %eax, (\dst, \tmp64) // Store last LEN - 4 bytes -+ vmovd \src, (\dst) // Store first 4 bytes -+ jmp .Ldone\@ -+ -+.Llt4\@: -+ // Store 1 <= LEN <= 3 bytes. -+ vpextrb $0, \src, 0(\dst) -+ cmp $-2, %ecx // LEN - 4 == -2, i.e. LEN == 2? -+ jl .Ldone\@ -+ vpextrb $1, \src, 1(\dst) -+ je .Ldone\@ -+ vpextrb $2, \src, 2(\dst) -+.Ldone\@: -+.endm -+ -+// Prepare the next two vectors of AES inputs in AESDATA\i0 and AESDATA\i1, and -+// XOR each with the zero-th round key. Also update LE_CTR if !\final. -+.macro _prepare_2_ctr_vecs is_xctr, i0, i1, final=0 -+.if \is_xctr -+ .if USE_AVX10 -+ _vmovdqa LE_CTR, AESDATA\i0 -+ vpternlogd $0x96, XCTR_IV, RNDKEY0, AESDATA\i0 -+ .else -+ vpxor XCTR_IV, LE_CTR, AESDATA\i0 -+ vpxor RNDKEY0, AESDATA\i0, AESDATA\i0 -+ .endif -+ vpaddq LE_CTR_INC1, LE_CTR, AESDATA\i1 -+ -+ .if USE_AVX10 -+ vpternlogd $0x96, XCTR_IV, RNDKEY0, AESDATA\i1 -+ .else -+ vpxor XCTR_IV, AESDATA\i1, AESDATA\i1 -+ vpxor RNDKEY0, AESDATA\i1, AESDATA\i1 -+ .endif -+.else -+ vpshufb BSWAP_MASK, LE_CTR, AESDATA\i0 -+ _vpxor RNDKEY0, AESDATA\i0, AESDATA\i0 -+ vpaddq LE_CTR_INC1, LE_CTR, AESDATA\i1 -+ vpshufb BSWAP_MASK, AESDATA\i1, AESDATA\i1 -+ _vpxor RNDKEY0, AESDATA\i1, AESDATA\i1 -+.endif -+.if !\final -+ vpaddq LE_CTR_INC2, LE_CTR, LE_CTR -+.endif -+.endm -+ -+// Do all AES rounds on the data in the given AESDATA vectors, excluding the -+// zero-th and last rounds. -+.macro _aesenc_loop vecs:vararg -+ mov KEY, %rax -+1: -+ _vbroadcast128 (%rax), RNDKEY -+.irp i, \vecs -+ vaesenc RNDKEY, AESDATA\i, AESDATA\i -+.endr -+ add $16, %rax -+ cmp %rax, RNDKEYLAST_PTR -+ jne 1b -+.endm -+ -+// Finalize the keystream blocks in the given AESDATA vectors by doing the last -+// AES round, then XOR those keystream blocks with the corresponding data. -+// Reduce latency by doing the XOR before the vaesenclast, utilizing the -+// property vaesenclast(key, a) ^ b == vaesenclast(key ^ b, a). -+.macro _aesenclast_and_xor vecs:vararg -+.irp i, \vecs -+ _vpxor \i*VL(SRC), RNDKEYLAST, RNDKEY -+ vaesenclast RNDKEY, AESDATA\i, AESDATA\i -+.endr -+.irp i, \vecs -+ _vmovdqu AESDATA\i, \i*VL(DST) -+.endr -+.endm -+ -+// XOR the keystream blocks in the specified AESDATA vectors with the -+// corresponding data. -+.macro _xor_data vecs:vararg -+.irp i, \vecs -+ _vpxor \i*VL(SRC), AESDATA\i, AESDATA\i -+.endr -+.irp i, \vecs -+ _vmovdqu AESDATA\i, \i*VL(DST) -+.endr -+.endm -+ -+.macro _aes_ctr_crypt is_xctr -+ -+ // Define register aliases V0-V15 that map to the xmm, ymm, or zmm -+ // registers according to the selected Vector Length (VL). -+.irp i, 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -+ .if VL == 16 -+ .set V\i, %xmm\i -+ .elseif VL == 32 -+ .set V\i, %ymm\i -+ .elseif VL == 64 -+ .set V\i, %zmm\i -+ .else -+ .error "Unsupported Vector Length (VL)" -+ .endif -+.endr -+ -+ // Function arguments -+ .set KEY, %rdi // Initially points to the start of the -+ // crypto_aes_ctx, then is advanced to -+ // point to the index 1 round key -+ .set KEY32, %edi // Available as temp register after all -+ // keystream blocks have been generated -+ .set SRC, %rsi // Pointer to next source data -+ .set DST, %rdx // Pointer to next destination data -+ .set LEN, %ecx // Remaining length in bytes. -+ // Note: _load_partial_block relies on -+ // this being in %ecx. -+ .set LEN64, %rcx // Zero-extend LEN before using! -+ .set LEN8, %cl -+.if \is_xctr -+ .set XCTR_IV_PTR, %r8 // const u8 iv[AES_BLOCK_SIZE]; -+ .set XCTR_CTR, %r9 // u64 ctr; -+.else -+ .set LE_CTR_PTR, %r8 // const u64 le_ctr[2]; -+.endif -+ -+ // Additional local variables -+ .set RNDKEYLAST_PTR, %r10 -+ .set AESDATA0, V0 -+ .set AESDATA0_XMM, %xmm0 -+ .set AESDATA1, V1 -+ .set AESDATA1_XMM, %xmm1 -+ .set AESDATA2, V2 -+ .set AESDATA3, V3 -+ .set AESDATA4, V4 -+ .set AESDATA5, V5 -+ .set AESDATA6, V6 -+ .set AESDATA7, V7 -+.if \is_xctr -+ .set XCTR_IV, V8 -+.else -+ .set BSWAP_MASK, V8 -+.endif -+ .set LE_CTR, V9 -+ .set LE_CTR_XMM, %xmm9 -+ .set LE_CTR_INC1, V10 -+ .set LE_CTR_INC2, V11 -+ .set RNDKEY0, V12 -+ .set RNDKEYLAST, V13 -+ .set RNDKEY, V14 -+ -+ // Create the first vector of counters. -+.if \is_xctr -+ .if VL == 16 -+ vmovq XCTR_CTR, LE_CTR -+ .elseif VL == 32 -+ vmovq XCTR_CTR, LE_CTR_XMM -+ inc XCTR_CTR -+ vmovq XCTR_CTR, AESDATA0_XMM -+ vinserti128 $1, AESDATA0_XMM, LE_CTR, LE_CTR -+ .else -+ vpbroadcastq XCTR_CTR, LE_CTR -+ vpsrldq $8, LE_CTR, LE_CTR -+ vpaddq .Lctr_pattern(%rip), LE_CTR, LE_CTR -+ .endif -+ _vbroadcast128 (XCTR_IV_PTR), XCTR_IV -+.else -+ _vbroadcast128 (LE_CTR_PTR), LE_CTR -+ .if VL > 16 -+ vpaddq .Lctr_pattern(%rip), LE_CTR, LE_CTR -+ .endif -+ _vbroadcast128 .Lbswap_mask(%rip), BSWAP_MASK -+.endif -+ -+.if VL == 16 -+ _vbroadcast128 .Lone(%rip), LE_CTR_INC1 -+.elseif VL == 32 -+ _vbroadcast128 .Ltwo(%rip), LE_CTR_INC1 -+.else -+ _vbroadcast128 .Lfour(%rip), LE_CTR_INC1 -+.endif -+ vpsllq $1, LE_CTR_INC1, LE_CTR_INC2 -+ -+ // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256). -+ movl 480(KEY), %eax -+ -+ // Compute the pointer to the last round key. -+ lea 6*16(KEY, %rax, 4), RNDKEYLAST_PTR -+ -+ // Load the zero-th and last round keys. -+ _vbroadcast128 (KEY), RNDKEY0 -+ _vbroadcast128 (RNDKEYLAST_PTR), RNDKEYLAST -+ -+ // Make KEY point to the first round key. -+ add $16, KEY -+ -+ // This is the main loop, which encrypts 8 vectors of data at a time. -+ add $-8*VL, LEN -+ jl .Lloop_8x_done\@ -+.Lloop_8x\@: -+ _prepare_2_ctr_vecs \is_xctr, 0, 1 -+ _prepare_2_ctr_vecs \is_xctr, 2, 3 -+ _prepare_2_ctr_vecs \is_xctr, 4, 5 -+ _prepare_2_ctr_vecs \is_xctr, 6, 7 -+ _aesenc_loop 0,1,2,3,4,5,6,7 -+ _aesenclast_and_xor 0,1,2,3,4,5,6,7 -+ sub $-8*VL, SRC -+ sub $-8*VL, DST -+ add $-8*VL, LEN -+ jge .Lloop_8x\@ -+.Lloop_8x_done\@: -+ sub $-8*VL, LEN -+ jz .Ldone\@ -+ -+ // 1 <= LEN < 8*VL. Generate 2, 4, or 8 more vectors of keystream -+ // blocks, depending on the remaining LEN. -+ -+ _prepare_2_ctr_vecs \is_xctr, 0, 1 -+ _prepare_2_ctr_vecs \is_xctr, 2, 3 -+ cmp $4*VL, LEN -+ jle .Lenc_tail_atmost4vecs\@ -+ -+ // 4*VL < LEN < 8*VL. Generate 8 vectors of keystream blocks. Use the -+ // first 4 to XOR 4 full vectors of data. Then XOR the remaining data. -+ _prepare_2_ctr_vecs \is_xctr, 4, 5 -+ _prepare_2_ctr_vecs \is_xctr, 6, 7, final=1 -+ _aesenc_loop 0,1,2,3,4,5,6,7 -+ _aesenclast_and_xor 0,1,2,3 -+ vaesenclast RNDKEYLAST, AESDATA4, AESDATA0 -+ vaesenclast RNDKEYLAST, AESDATA5, AESDATA1 -+ vaesenclast RNDKEYLAST, AESDATA6, AESDATA2 -+ vaesenclast RNDKEYLAST, AESDATA7, AESDATA3 -+ sub $-4*VL, SRC -+ sub $-4*VL, DST -+ add $-4*VL, LEN -+ cmp $1*VL-1, LEN -+ jle .Lxor_tail_partial_vec_0\@ -+ _xor_data 0 -+ cmp $2*VL-1, LEN -+ jle .Lxor_tail_partial_vec_1\@ -+ _xor_data 1 -+ cmp $3*VL-1, LEN -+ jle .Lxor_tail_partial_vec_2\@ -+ _xor_data 2 -+ cmp $4*VL-1, LEN -+ jle .Lxor_tail_partial_vec_3\@ -+ _xor_data 3 -+ jmp .Ldone\@ -+ -+.Lenc_tail_atmost4vecs\@: -+ cmp $2*VL, LEN -+ jle .Lenc_tail_atmost2vecs\@ -+ -+ // 2*VL < LEN <= 4*VL. Generate 4 vectors of keystream blocks. Use the -+ // first 2 to XOR 2 full vectors of data. Then XOR the remaining data. -+ _aesenc_loop 0,1,2,3 -+ _aesenclast_and_xor 0,1 -+ vaesenclast RNDKEYLAST, AESDATA2, AESDATA0 -+ vaesenclast RNDKEYLAST, AESDATA3, AESDATA1 -+ sub $-2*VL, SRC -+ sub $-2*VL, DST -+ add $-2*VL, LEN -+ jmp .Lxor_tail_upto2vecs\@ -+ -+.Lenc_tail_atmost2vecs\@: -+ // 1 <= LEN <= 2*VL. Generate 2 vectors of keystream blocks. Then XOR -+ // the remaining data. -+ _aesenc_loop 0,1 -+ vaesenclast RNDKEYLAST, AESDATA0, AESDATA0 -+ vaesenclast RNDKEYLAST, AESDATA1, AESDATA1 -+ -+.Lxor_tail_upto2vecs\@: -+ cmp $1*VL-1, LEN -+ jle .Lxor_tail_partial_vec_0\@ -+ _xor_data 0 -+ cmp $2*VL-1, LEN -+ jle .Lxor_tail_partial_vec_1\@ -+ _xor_data 1 -+ jmp .Ldone\@ -+ -+.Lxor_tail_partial_vec_1\@: -+ add $-1*VL, LEN -+ jz .Ldone\@ -+ sub $-1*VL, SRC -+ sub $-1*VL, DST -+ _vmovdqa AESDATA1, AESDATA0 -+ jmp .Lxor_tail_partial_vec_0\@ -+ -+.Lxor_tail_partial_vec_2\@: -+ add $-2*VL, LEN -+ jz .Ldone\@ -+ sub $-2*VL, SRC -+ sub $-2*VL, DST -+ _vmovdqa AESDATA2, AESDATA0 -+ jmp .Lxor_tail_partial_vec_0\@ -+ -+.Lxor_tail_partial_vec_3\@: -+ add $-3*VL, LEN -+ jz .Ldone\@ -+ sub $-3*VL, SRC -+ sub $-3*VL, DST -+ _vmovdqa AESDATA3, AESDATA0 -+ -+.Lxor_tail_partial_vec_0\@: -+ // XOR the remaining 1 <= LEN < VL bytes. It's easy if masked -+ // loads/stores are available; otherwise it's a bit harder... -+.if USE_AVX10 -+ .if VL <= 32 -+ mov $-1, %eax -+ bzhi LEN, %eax, %eax -+ kmovd %eax, %k1 -+ .else -+ mov $-1, %rax -+ bzhi LEN64, %rax, %rax -+ kmovq %rax, %k1 -+ .endif -+ vmovdqu8 (SRC), AESDATA1{%k1}{z} -+ _vpxor AESDATA1, AESDATA0, AESDATA0 -+ vmovdqu8 AESDATA0, (DST){%k1} -+.else -+ .if VL == 32 -+ cmp $16, LEN -+ jl 1f -+ vpxor (SRC), AESDATA0_XMM, AESDATA1_XMM -+ vmovdqu AESDATA1_XMM, (DST) -+ add $16, SRC -+ add $16, DST -+ sub $16, LEN -+ jz .Ldone\@ -+ vextracti128 $1, AESDATA0, AESDATA0_XMM -+1: -+ .endif -+ mov LEN, %r10d -+ _load_partial_block SRC, AESDATA1_XMM, KEY, KEY32 -+ vpxor AESDATA1_XMM, AESDATA0_XMM, AESDATA0_XMM -+ mov %r10d, %ecx -+ _store_partial_block AESDATA0_XMM, DST, KEY, KEY32 -+.endif -+ -+.Ldone\@: -+.if VL > 16 -+ vzeroupper -+.endif -+ RET -+.endm -+ -+// Below are the definitions of the functions generated by the above macro. -+// They have the following prototypes: -+// -+// -+// void aes_ctr64_crypt_##suffix(const struct crypto_aes_ctx *key, -+// const u8 *src, u8 *dst, int len, -+// const u64 le_ctr[2]); -+// -+// void aes_xctr_crypt_##suffix(const struct crypto_aes_ctx *key, -+// const u8 *src, u8 *dst, int len, -+// const u8 iv[AES_BLOCK_SIZE], u64 ctr); -+// -+// Both functions generate |len| bytes of keystream, XOR it with the data from -+// |src|, and write the result to |dst|. On non-final calls, |len| must be a -+// multiple of 16. On the final call, |len| can be any value. -+// -+// aes_ctr64_crypt_* implement "regular" CTR, where the keystream is generated -+// from a 128-bit big endian counter that increments by 1 for each AES block. -+// HOWEVER, to keep the assembly code simple, some of the counter management is -+// left to the caller. aes_ctr64_crypt_* take the counter in little endian -+// form, only increment the low 64 bits internally, do the conversion to big -+// endian internally, and don't write the updated counter back to memory. The -+// caller is responsible for converting the starting IV to the little endian -+// le_ctr, detecting the (very rare) case of a carry out of the low 64 bits -+// being needed and splitting at that point with a carry done in between, and -+// updating le_ctr after each part if the message is multi-part. -+// -+// aes_xctr_crypt_* implement XCTR as specified in "Length-preserving encryption -+// with HCTR2" (https://eprint.iacr.org/2021/1441.pdf). XCTR is an -+// easier-to-implement variant of CTR that uses little endian byte order and -+// eliminates carries. |ctr| is the per-message block counter starting at 1. -+ -+.set VL, 16 -+.set USE_AVX10, 0 -+SYM_TYPED_FUNC_START(aes_ctr64_crypt_aesni_avx) -+ _aes_ctr_crypt 0 -+SYM_FUNC_END(aes_ctr64_crypt_aesni_avx) -+SYM_TYPED_FUNC_START(aes_xctr_crypt_aesni_avx) -+ _aes_ctr_crypt 1 -+SYM_FUNC_END(aes_xctr_crypt_aesni_avx) -+ -+#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) -+.set VL, 32 -+.set USE_AVX10, 0 -+SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx2) -+ _aes_ctr_crypt 0 -+SYM_FUNC_END(aes_ctr64_crypt_vaes_avx2) -+SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx2) -+ _aes_ctr_crypt 1 -+SYM_FUNC_END(aes_xctr_crypt_vaes_avx2) -+ -+.set VL, 32 -+.set USE_AVX10, 1 -+SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx10_256) -+ _aes_ctr_crypt 0 -+SYM_FUNC_END(aes_ctr64_crypt_vaes_avx10_256) -+SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx10_256) -+ _aes_ctr_crypt 1 -+SYM_FUNC_END(aes_xctr_crypt_vaes_avx10_256) -+ -+.set VL, 64 -+.set USE_AVX10, 1 -+SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx10_512) -+ _aes_ctr_crypt 0 -+SYM_FUNC_END(aes_ctr64_crypt_vaes_avx10_512) -+SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx10_512) -+ _aes_ctr_crypt 1 -+SYM_FUNC_END(aes_xctr_crypt_vaes_avx10_512) -+#endif // CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ ---- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S -+++ /dev/null -@@ -1,597 +0,0 @@ --/* SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause */ --/* -- * AES CTR mode by8 optimization with AVX instructions. (x86_64) -- * -- * Copyright(c) 2014 Intel Corporation. -- * -- * Contact Information: -- * James Guilford -- * Sean Gulley -- * Chandramouli Narayanan -- */ --/* -- * This is AES128/192/256 CTR mode optimization implementation. It requires -- * the support of Intel(R) AESNI and AVX instructions. -- * -- * This work was inspired by the AES CTR mode optimization published -- * in Intel Optimized IPSEC Cryptographic library. -- * Additional information on it can be found at: -- * https://github.com/intel/intel-ipsec-mb -- */ -- --#include -- --#define VMOVDQ vmovdqu -- --/* -- * Note: the "x" prefix in these aliases means "this is an xmm register". The -- * alias prefixes have no relation to XCTR where the "X" prefix means "XOR -- * counter". -- */ --#define xdata0 %xmm0 --#define xdata1 %xmm1 --#define xdata2 %xmm2 --#define xdata3 %xmm3 --#define xdata4 %xmm4 --#define xdata5 %xmm5 --#define xdata6 %xmm6 --#define xdata7 %xmm7 --#define xcounter %xmm8 // CTR mode only --#define xiv %xmm8 // XCTR mode only --#define xbyteswap %xmm9 // CTR mode only --#define xtmp %xmm9 // XCTR mode only --#define xkey0 %xmm10 --#define xkey4 %xmm11 --#define xkey8 %xmm12 --#define xkey12 %xmm13 --#define xkeyA %xmm14 --#define xkeyB %xmm15 -- --#define p_in %rdi --#define p_iv %rsi --#define p_keys %rdx --#define p_out %rcx --#define num_bytes %r8 --#define counter %r9 // XCTR mode only --#define tmp %r10 --#define DDQ_DATA 0 --#define XDATA 1 --#define KEY_128 1 --#define KEY_192 2 --#define KEY_256 3 -- --.section .rodata --.align 16 -- --byteswap_const: -- .octa 0x000102030405060708090A0B0C0D0E0F --ddq_low_msk: -- .octa 0x0000000000000000FFFFFFFFFFFFFFFF --ddq_high_add_1: -- .octa 0x00000000000000010000000000000000 --ddq_add_1: -- .octa 0x00000000000000000000000000000001 --ddq_add_2: -- .octa 0x00000000000000000000000000000002 --ddq_add_3: -- .octa 0x00000000000000000000000000000003 --ddq_add_4: -- .octa 0x00000000000000000000000000000004 --ddq_add_5: -- .octa 0x00000000000000000000000000000005 --ddq_add_6: -- .octa 0x00000000000000000000000000000006 --ddq_add_7: -- .octa 0x00000000000000000000000000000007 --ddq_add_8: -- .octa 0x00000000000000000000000000000008 -- --.text -- --/* generate a unique variable for ddq_add_x */ -- --/* generate a unique variable for xmm register */ --.macro setxdata n -- var_xdata = %xmm\n --.endm -- --/* club the numeric 'id' to the symbol 'name' */ -- --.macro club name, id --.altmacro -- .if \name == XDATA -- setxdata %\id -- .endif --.noaltmacro --.endm -- --/* -- * do_aes num_in_par load_keys key_len -- * This increments p_in, but not p_out -- */ --.macro do_aes b, k, key_len, xctr -- .set by, \b -- .set load_keys, \k -- .set klen, \key_len -- -- .if (load_keys) -- vmovdqa 0*16(p_keys), xkey0 -- .endif -- -- .if \xctr -- movq counter, xtmp -- .set i, 0 -- .rept (by) -- club XDATA, i -- vpaddq (ddq_add_1 + 16 * i)(%rip), xtmp, var_xdata -- .set i, (i +1) -- .endr -- .set i, 0 -- .rept (by) -- club XDATA, i -- vpxor xiv, var_xdata, var_xdata -- .set i, (i +1) -- .endr -- .else -- vpshufb xbyteswap, xcounter, xdata0 -- .set i, 1 -- .rept (by - 1) -- club XDATA, i -- vpaddq (ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata -- vptest ddq_low_msk(%rip), var_xdata -- jnz 1f -- vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata -- vpaddq ddq_high_add_1(%rip), xcounter, xcounter -- 1: -- vpshufb xbyteswap, var_xdata, var_xdata -- .set i, (i +1) -- .endr -- .endif -- -- vmovdqa 1*16(p_keys), xkeyA -- -- vpxor xkey0, xdata0, xdata0 -- .if \xctr -- add $by, counter -- .else -- vpaddq (ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter -- vptest ddq_low_msk(%rip), xcounter -- jnz 1f -- vpaddq ddq_high_add_1(%rip), xcounter, xcounter -- 1: -- .endif -- -- .set i, 1 -- .rept (by - 1) -- club XDATA, i -- vpxor xkey0, var_xdata, var_xdata -- .set i, (i +1) -- .endr -- -- vmovdqa 2*16(p_keys), xkeyB -- -- .set i, 0 -- .rept by -- club XDATA, i -- vaesenc xkeyA, var_xdata, var_xdata /* key 1 */ -- .set i, (i +1) -- .endr -- -- .if (klen == KEY_128) -- .if (load_keys) -- vmovdqa 3*16(p_keys), xkey4 -- .endif -- .else -- vmovdqa 3*16(p_keys), xkeyA -- .endif -- -- .set i, 0 -- .rept by -- club XDATA, i -- vaesenc xkeyB, var_xdata, var_xdata /* key 2 */ -- .set i, (i +1) -- .endr -- -- add $(16*by), p_in -- -- .if (klen == KEY_128) -- vmovdqa 4*16(p_keys), xkeyB -- .else -- .if (load_keys) -- vmovdqa 4*16(p_keys), xkey4 -- .endif -- .endif -- -- .set i, 0 -- .rept by -- club XDATA, i -- /* key 3 */ -- .if (klen == KEY_128) -- vaesenc xkey4, var_xdata, var_xdata -- .else -- vaesenc xkeyA, var_xdata, var_xdata -- .endif -- .set i, (i +1) -- .endr -- -- vmovdqa 5*16(p_keys), xkeyA -- -- .set i, 0 -- .rept by -- club XDATA, i -- /* key 4 */ -- .if (klen == KEY_128) -- vaesenc xkeyB, var_xdata, var_xdata -- .else -- vaesenc xkey4, var_xdata, var_xdata -- .endif -- .set i, (i +1) -- .endr -- -- .if (klen == KEY_128) -- .if (load_keys) -- vmovdqa 6*16(p_keys), xkey8 -- .endif -- .else -- vmovdqa 6*16(p_keys), xkeyB -- .endif -- -- .set i, 0 -- .rept by -- club XDATA, i -- vaesenc xkeyA, var_xdata, var_xdata /* key 5 */ -- .set i, (i +1) -- .endr -- -- vmovdqa 7*16(p_keys), xkeyA -- -- .set i, 0 -- .rept by -- club XDATA, i -- /* key 6 */ -- .if (klen == KEY_128) -- vaesenc xkey8, var_xdata, var_xdata -- .else -- vaesenc xkeyB, var_xdata, var_xdata -- .endif -- .set i, (i +1) -- .endr -- -- .if (klen == KEY_128) -- vmovdqa 8*16(p_keys), xkeyB -- .else -- .if (load_keys) -- vmovdqa 8*16(p_keys), xkey8 -- .endif -- .endif -- -- .set i, 0 -- .rept by -- club XDATA, i -- vaesenc xkeyA, var_xdata, var_xdata /* key 7 */ -- .set i, (i +1) -- .endr -- -- .if (klen == KEY_128) -- .if (load_keys) -- vmovdqa 9*16(p_keys), xkey12 -- .endif -- .else -- vmovdqa 9*16(p_keys), xkeyA -- .endif -- -- .set i, 0 -- .rept by -- club XDATA, i -- /* key 8 */ -- .if (klen == KEY_128) -- vaesenc xkeyB, var_xdata, var_xdata -- .else -- vaesenc xkey8, var_xdata, var_xdata -- .endif -- .set i, (i +1) -- .endr -- -- vmovdqa 10*16(p_keys), xkeyB -- -- .set i, 0 -- .rept by -- club XDATA, i -- /* key 9 */ -- .if (klen == KEY_128) -- vaesenc xkey12, var_xdata, var_xdata -- .else -- vaesenc xkeyA, var_xdata, var_xdata -- .endif -- .set i, (i +1) -- .endr -- -- .if (klen != KEY_128) -- vmovdqa 11*16(p_keys), xkeyA -- .endif -- -- .set i, 0 -- .rept by -- club XDATA, i -- /* key 10 */ -- .if (klen == KEY_128) -- vaesenclast xkeyB, var_xdata, var_xdata -- .else -- vaesenc xkeyB, var_xdata, var_xdata -- .endif -- .set i, (i +1) -- .endr -- -- .if (klen != KEY_128) -- .if (load_keys) -- vmovdqa 12*16(p_keys), xkey12 -- .endif -- -- .set i, 0 -- .rept by -- club XDATA, i -- vaesenc xkeyA, var_xdata, var_xdata /* key 11 */ -- .set i, (i +1) -- .endr -- -- .if (klen == KEY_256) -- vmovdqa 13*16(p_keys), xkeyA -- .endif -- -- .set i, 0 -- .rept by -- club XDATA, i -- .if (klen == KEY_256) -- /* key 12 */ -- vaesenc xkey12, var_xdata, var_xdata -- .else -- vaesenclast xkey12, var_xdata, var_xdata -- .endif -- .set i, (i +1) -- .endr -- -- .if (klen == KEY_256) -- vmovdqa 14*16(p_keys), xkeyB -- -- .set i, 0 -- .rept by -- club XDATA, i -- /* key 13 */ -- vaesenc xkeyA, var_xdata, var_xdata -- .set i, (i +1) -- .endr -- -- .set i, 0 -- .rept by -- club XDATA, i -- /* key 14 */ -- vaesenclast xkeyB, var_xdata, var_xdata -- .set i, (i +1) -- .endr -- .endif -- .endif -- -- .set i, 0 -- .rept (by / 2) -- .set j, (i+1) -- VMOVDQ (i*16 - 16*by)(p_in), xkeyA -- VMOVDQ (j*16 - 16*by)(p_in), xkeyB -- club XDATA, i -- vpxor xkeyA, var_xdata, var_xdata -- club XDATA, j -- vpxor xkeyB, var_xdata, var_xdata -- .set i, (i+2) -- .endr -- -- .if (i < by) -- VMOVDQ (i*16 - 16*by)(p_in), xkeyA -- club XDATA, i -- vpxor xkeyA, var_xdata, var_xdata -- .endif -- -- .set i, 0 -- .rept by -- club XDATA, i -- VMOVDQ var_xdata, i*16(p_out) -- .set i, (i+1) -- .endr --.endm -- --.macro do_aes_load val, key_len, xctr -- do_aes \val, 1, \key_len, \xctr --.endm -- --.macro do_aes_noload val, key_len, xctr -- do_aes \val, 0, \key_len, \xctr --.endm -- --/* main body of aes ctr load */ -- --.macro do_aes_ctrmain key_len, xctr -- cmp $16, num_bytes -- jb .Ldo_return2\xctr\key_len -- -- .if \xctr -- shr $4, counter -- vmovdqu (p_iv), xiv -- .else -- vmovdqa byteswap_const(%rip), xbyteswap -- vmovdqu (p_iv), xcounter -- vpshufb xbyteswap, xcounter, xcounter -- .endif -- -- mov num_bytes, tmp -- and $(7*16), tmp -- jz .Lmult_of_8_blks\xctr\key_len -- -- /* 1 <= tmp <= 7 */ -- cmp $(4*16), tmp -- jg .Lgt4\xctr\key_len -- je .Leq4\xctr\key_len -- --.Llt4\xctr\key_len: -- cmp $(2*16), tmp -- jg .Leq3\xctr\key_len -- je .Leq2\xctr\key_len -- --.Leq1\xctr\key_len: -- do_aes_load 1, \key_len, \xctr -- add $(1*16), p_out -- and $(~7*16), num_bytes -- jz .Ldo_return2\xctr\key_len -- jmp .Lmain_loop2\xctr\key_len -- --.Leq2\xctr\key_len: -- do_aes_load 2, \key_len, \xctr -- add $(2*16), p_out -- and $(~7*16), num_bytes -- jz .Ldo_return2\xctr\key_len -- jmp .Lmain_loop2\xctr\key_len -- -- --.Leq3\xctr\key_len: -- do_aes_load 3, \key_len, \xctr -- add $(3*16), p_out -- and $(~7*16), num_bytes -- jz .Ldo_return2\xctr\key_len -- jmp .Lmain_loop2\xctr\key_len -- --.Leq4\xctr\key_len: -- do_aes_load 4, \key_len, \xctr -- add $(4*16), p_out -- and $(~7*16), num_bytes -- jz .Ldo_return2\xctr\key_len -- jmp .Lmain_loop2\xctr\key_len -- --.Lgt4\xctr\key_len: -- cmp $(6*16), tmp -- jg .Leq7\xctr\key_len -- je .Leq6\xctr\key_len -- --.Leq5\xctr\key_len: -- do_aes_load 5, \key_len, \xctr -- add $(5*16), p_out -- and $(~7*16), num_bytes -- jz .Ldo_return2\xctr\key_len -- jmp .Lmain_loop2\xctr\key_len -- --.Leq6\xctr\key_len: -- do_aes_load 6, \key_len, \xctr -- add $(6*16), p_out -- and $(~7*16), num_bytes -- jz .Ldo_return2\xctr\key_len -- jmp .Lmain_loop2\xctr\key_len -- --.Leq7\xctr\key_len: -- do_aes_load 7, \key_len, \xctr -- add $(7*16), p_out -- and $(~7*16), num_bytes -- jz .Ldo_return2\xctr\key_len -- jmp .Lmain_loop2\xctr\key_len -- --.Lmult_of_8_blks\xctr\key_len: -- .if (\key_len != KEY_128) -- vmovdqa 0*16(p_keys), xkey0 -- vmovdqa 4*16(p_keys), xkey4 -- vmovdqa 8*16(p_keys), xkey8 -- vmovdqa 12*16(p_keys), xkey12 -- .else -- vmovdqa 0*16(p_keys), xkey0 -- vmovdqa 3*16(p_keys), xkey4 -- vmovdqa 6*16(p_keys), xkey8 -- vmovdqa 9*16(p_keys), xkey12 -- .endif --.align 16 --.Lmain_loop2\xctr\key_len: -- /* num_bytes is a multiple of 8 and >0 */ -- do_aes_noload 8, \key_len, \xctr -- add $(8*16), p_out -- sub $(8*16), num_bytes -- jne .Lmain_loop2\xctr\key_len -- --.Ldo_return2\xctr\key_len: -- .if !\xctr -- /* return updated IV */ -- vpshufb xbyteswap, xcounter, xcounter -- vmovdqu xcounter, (p_iv) -- .endif -- RET --.endm -- --/* -- * routine to do AES128 CTR enc/decrypt "by8" -- * XMM registers are clobbered. -- * Saving/restoring must be done at a higher level -- * aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out, -- * unsigned int num_bytes) -- */ --SYM_FUNC_START(aes_ctr_enc_128_avx_by8) -- /* call the aes main loop */ -- do_aes_ctrmain KEY_128 0 -- --SYM_FUNC_END(aes_ctr_enc_128_avx_by8) -- --/* -- * routine to do AES192 CTR enc/decrypt "by8" -- * XMM registers are clobbered. -- * Saving/restoring must be done at a higher level -- * aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out, -- * unsigned int num_bytes) -- */ --SYM_FUNC_START(aes_ctr_enc_192_avx_by8) -- /* call the aes main loop */ -- do_aes_ctrmain KEY_192 0 -- --SYM_FUNC_END(aes_ctr_enc_192_avx_by8) -- --/* -- * routine to do AES256 CTR enc/decrypt "by8" -- * XMM registers are clobbered. -- * Saving/restoring must be done at a higher level -- * aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out, -- * unsigned int num_bytes) -- */ --SYM_FUNC_START(aes_ctr_enc_256_avx_by8) -- /* call the aes main loop */ -- do_aes_ctrmain KEY_256 0 -- --SYM_FUNC_END(aes_ctr_enc_256_avx_by8) -- --/* -- * routine to do AES128 XCTR enc/decrypt "by8" -- * XMM registers are clobbered. -- * Saving/restoring must be done at a higher level -- * aes_xctr_enc_128_avx_by8(const u8 *in, const u8 *iv, const void *keys, -- * u8* out, unsigned int num_bytes, unsigned int byte_ctr) -- */ --SYM_FUNC_START(aes_xctr_enc_128_avx_by8) -- /* call the aes main loop */ -- do_aes_ctrmain KEY_128 1 -- --SYM_FUNC_END(aes_xctr_enc_128_avx_by8) -- --/* -- * routine to do AES192 XCTR enc/decrypt "by8" -- * XMM registers are clobbered. -- * Saving/restoring must be done at a higher level -- * aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv, const void *keys, -- * u8* out, unsigned int num_bytes, unsigned int byte_ctr) -- */ --SYM_FUNC_START(aes_xctr_enc_192_avx_by8) -- /* call the aes main loop */ -- do_aes_ctrmain KEY_192 1 -- --SYM_FUNC_END(aes_xctr_enc_192_avx_by8) -- --/* -- * routine to do AES256 XCTR enc/decrypt "by8" -- * XMM registers are clobbered. -- * Saving/restoring must be done at a higher level -- * aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv, const void *keys, -- * u8* out, unsigned int num_bytes, unsigned int byte_ctr) -- */ --SYM_FUNC_START(aes_xctr_enc_256_avx_by8) -- /* call the aes main loop */ -- do_aes_ctrmain KEY_256 1 -- --SYM_FUNC_END(aes_xctr_enc_256_avx_by8) ---- a/arch/x86/crypto/aesni-intel_glue.c -+++ b/arch/x86/crypto/aesni-intel_glue.c -@@ -23,7 +23,6 @@ - #include - #include - #include --#include - #include - #include - #include -@@ -82,30 +81,8 @@ asmlinkage void aesni_xts_dec(const stru - const u8 *in, unsigned int len, u8 *iv); - - #ifdef CONFIG_X86_64 -- - asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, unsigned int len, u8 *iv); --DEFINE_STATIC_CALL(aesni_ctr_enc_tfm, aesni_ctr_enc); -- --asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv, -- void *keys, u8 *out, unsigned int num_bytes); --asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv, -- void *keys, u8 *out, unsigned int num_bytes); --asmlinkage void aes_ctr_enc_256_avx_by8(const u8 *in, u8 *iv, -- void *keys, u8 *out, unsigned int num_bytes); -- -- --asmlinkage void aes_xctr_enc_128_avx_by8(const u8 *in, const u8 *iv, -- const void *keys, u8 *out, unsigned int num_bytes, -- unsigned int byte_ctr); -- --asmlinkage void aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv, -- const void *keys, u8 *out, unsigned int num_bytes, -- unsigned int byte_ctr); -- --asmlinkage void aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv, -- const void *keys, u8 *out, unsigned int num_bytes, -- unsigned int byte_ctr); - #endif - - static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) -@@ -376,24 +353,8 @@ static int cts_cbc_decrypt(struct skciph - } - - #ifdef CONFIG_X86_64 --static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, -- const u8 *in, unsigned int len, u8 *iv) --{ -- /* -- * based on key length, override with the by8 version -- * of ctr mode encryption/decryption for improved performance -- * aes_set_key_common() ensures that key length is one of -- * {128,192,256} -- */ -- if (ctx->key_length == AES_KEYSIZE_128) -- aes_ctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len); -- else if (ctx->key_length == AES_KEYSIZE_192) -- aes_ctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len); -- else -- aes_ctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len); --} -- --static int ctr_crypt(struct skcipher_request *req) -+/* This is the non-AVX version. */ -+static int ctr_crypt_aesni(struct skcipher_request *req) - { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); -@@ -407,10 +368,9 @@ static int ctr_crypt(struct skcipher_req - while ((nbytes = walk.nbytes) > 0) { - kernel_fpu_begin(); - if (nbytes & AES_BLOCK_MASK) -- static_call(aesni_ctr_enc_tfm)(ctx, walk.dst.virt.addr, -- walk.src.virt.addr, -- nbytes & AES_BLOCK_MASK, -- walk.iv); -+ aesni_ctr_enc(ctx, walk.dst.virt.addr, -+ walk.src.virt.addr, -+ nbytes & AES_BLOCK_MASK, walk.iv); - nbytes &= ~AES_BLOCK_MASK; - - if (walk.nbytes == walk.total && nbytes > 0) { -@@ -426,59 +386,6 @@ static int ctr_crypt(struct skcipher_req - } - return err; - } -- --static void aesni_xctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, -- const u8 *in, unsigned int len, u8 *iv, -- unsigned int byte_ctr) --{ -- if (ctx->key_length == AES_KEYSIZE_128) -- aes_xctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len, -- byte_ctr); -- else if (ctx->key_length == AES_KEYSIZE_192) -- aes_xctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len, -- byte_ctr); -- else -- aes_xctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len, -- byte_ctr); --} -- --static int xctr_crypt(struct skcipher_request *req) --{ -- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -- struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); -- u8 keystream[AES_BLOCK_SIZE]; -- struct skcipher_walk walk; -- unsigned int nbytes; -- unsigned int byte_ctr = 0; -- int err; -- __le32 block[AES_BLOCK_SIZE / sizeof(__le32)]; -- -- err = skcipher_walk_virt(&walk, req, false); -- -- while ((nbytes = walk.nbytes) > 0) { -- kernel_fpu_begin(); -- if (nbytes & AES_BLOCK_MASK) -- aesni_xctr_enc_avx_tfm(ctx, walk.dst.virt.addr, -- walk.src.virt.addr, nbytes & AES_BLOCK_MASK, -- walk.iv, byte_ctr); -- nbytes &= ~AES_BLOCK_MASK; -- byte_ctr += walk.nbytes - nbytes; -- -- if (walk.nbytes == walk.total && nbytes > 0) { -- memcpy(block, walk.iv, AES_BLOCK_SIZE); -- block[0] ^= cpu_to_le32(1 + byte_ctr / AES_BLOCK_SIZE); -- aesni_enc(ctx, keystream, (u8 *)block); -- crypto_xor_cpy(walk.dst.virt.addr + walk.nbytes - -- nbytes, walk.src.virt.addr + walk.nbytes -- - nbytes, keystream, nbytes); -- byte_ctr += nbytes; -- nbytes = 0; -- } -- kernel_fpu_end(); -- err = skcipher_walk_done(&walk, nbytes); -- } -- return err; --} - #endif - - static int xts_setkey_aesni(struct crypto_skcipher *tfm, const u8 *key, -@@ -721,8 +628,8 @@ static struct skcipher_alg aesni_skciphe - .ivsize = AES_BLOCK_SIZE, - .chunksize = AES_BLOCK_SIZE, - .setkey = aesni_skcipher_setkey, -- .encrypt = ctr_crypt, -- .decrypt = ctr_crypt, -+ .encrypt = ctr_crypt_aesni, -+ .decrypt = ctr_crypt_aesni, - #endif - }, { - .base = { -@@ -748,35 +655,105 @@ static - struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)]; - - #ifdef CONFIG_X86_64 --/* -- * XCTR does not have a non-AVX implementation, so it must be enabled -- * conditionally. -- */ --static struct skcipher_alg aesni_xctr = { -- .base = { -- .cra_name = "__xctr(aes)", -- .cra_driver_name = "__xctr-aes-aesni", -- .cra_priority = 400, -- .cra_flags = CRYPTO_ALG_INTERNAL, -- .cra_blocksize = 1, -- .cra_ctxsize = CRYPTO_AES_CTX_SIZE, -- .cra_module = THIS_MODULE, -- }, -- .min_keysize = AES_MIN_KEY_SIZE, -- .max_keysize = AES_MAX_KEY_SIZE, -- .ivsize = AES_BLOCK_SIZE, -- .chunksize = AES_BLOCK_SIZE, -- .setkey = aesni_skcipher_setkey, -- .encrypt = xctr_crypt, -- .decrypt = xctr_crypt, --}; -- --static struct simd_skcipher_alg *aesni_simd_xctr; -- - asmlinkage void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, - u8 iv[AES_BLOCK_SIZE]); - --#define DEFINE_XTS_ALG(suffix, driver_name, priority) \ -+/* __always_inline to avoid indirect call */ -+static __always_inline int -+ctr_crypt(struct skcipher_request *req, -+ void (*ctr64_func)(const struct crypto_aes_ctx *key, -+ const u8 *src, u8 *dst, int len, -+ const u64 le_ctr[2])) -+{ -+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -+ const struct crypto_aes_ctx *key = aes_ctx(crypto_skcipher_ctx(tfm)); -+ unsigned int nbytes, p1_nbytes, nblocks; -+ struct skcipher_walk walk; -+ u64 le_ctr[2]; -+ u64 ctr64; -+ int err; -+ -+ ctr64 = le_ctr[0] = get_unaligned_be64(&req->iv[8]); -+ le_ctr[1] = get_unaligned_be64(&req->iv[0]); -+ -+ err = skcipher_walk_virt(&walk, req, false); -+ -+ while ((nbytes = walk.nbytes) != 0) { -+ if (nbytes < walk.total) { -+ /* Not the end yet, so keep the length block-aligned. */ -+ nbytes = round_down(nbytes, AES_BLOCK_SIZE); -+ nblocks = nbytes / AES_BLOCK_SIZE; -+ } else { -+ /* It's the end, so include any final partial block. */ -+ nblocks = DIV_ROUND_UP(nbytes, AES_BLOCK_SIZE); -+ } -+ ctr64 += nblocks; -+ -+ kernel_fpu_begin(); -+ if (likely(ctr64 >= nblocks)) { -+ /* The low 64 bits of the counter won't overflow. */ -+ (*ctr64_func)(key, walk.src.virt.addr, -+ walk.dst.virt.addr, nbytes, le_ctr); -+ } else { -+ /* -+ * The low 64 bits of the counter will overflow. The -+ * assembly doesn't handle this case, so split the -+ * operation into two at the point where the overflow -+ * will occur. After the first part, add the carry bit. -+ */ -+ p1_nbytes = min_t(unsigned int, nbytes, -+ (nblocks - ctr64) * AES_BLOCK_SIZE); -+ (*ctr64_func)(key, walk.src.virt.addr, -+ walk.dst.virt.addr, p1_nbytes, le_ctr); -+ le_ctr[0] = 0; -+ le_ctr[1]++; -+ (*ctr64_func)(key, walk.src.virt.addr + p1_nbytes, -+ walk.dst.virt.addr + p1_nbytes, -+ nbytes - p1_nbytes, le_ctr); -+ } -+ kernel_fpu_end(); -+ le_ctr[0] = ctr64; -+ -+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes); -+ } -+ -+ put_unaligned_be64(ctr64, &req->iv[8]); -+ put_unaligned_be64(le_ctr[1], &req->iv[0]); -+ -+ return err; -+} -+ -+/* __always_inline to avoid indirect call */ -+static __always_inline int -+xctr_crypt(struct skcipher_request *req, -+ void (*xctr_func)(const struct crypto_aes_ctx *key, -+ const u8 *src, u8 *dst, int len, -+ const u8 iv[AES_BLOCK_SIZE], u64 ctr)) -+{ -+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -+ const struct crypto_aes_ctx *key = aes_ctx(crypto_skcipher_ctx(tfm)); -+ struct skcipher_walk walk; -+ unsigned int nbytes; -+ u64 ctr = 1; -+ int err; -+ -+ err = skcipher_walk_virt(&walk, req, false); -+ while ((nbytes = walk.nbytes) != 0) { -+ if (nbytes < walk.total) -+ nbytes = round_down(nbytes, AES_BLOCK_SIZE); -+ -+ kernel_fpu_begin(); -+ (*xctr_func)(key, walk.src.virt.addr, walk.dst.virt.addr, -+ nbytes, req->iv, ctr); -+ kernel_fpu_end(); -+ -+ ctr += DIV_ROUND_UP(nbytes, AES_BLOCK_SIZE); -+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes); -+ } -+ return err; -+} -+ -+#define DEFINE_AVX_SKCIPHER_ALGS(suffix, driver_name_suffix, priority) \ - \ - asmlinkage void \ - aes_xts_encrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src, \ -@@ -795,32 +772,80 @@ static int xts_decrypt_##suffix(struct s - return xts_crypt(req, aes_xts_encrypt_iv, aes_xts_decrypt_##suffix); \ - } \ - \ --static struct skcipher_alg aes_xts_alg_##suffix = { \ -- .base = { \ -- .cra_name = "__xts(aes)", \ -- .cra_driver_name = "__" driver_name, \ -- .cra_priority = priority, \ -- .cra_flags = CRYPTO_ALG_INTERNAL, \ -- .cra_blocksize = AES_BLOCK_SIZE, \ -- .cra_ctxsize = XTS_AES_CTX_SIZE, \ -- .cra_module = THIS_MODULE, \ -- }, \ -- .min_keysize = 2 * AES_MIN_KEY_SIZE, \ -- .max_keysize = 2 * AES_MAX_KEY_SIZE, \ -- .ivsize = AES_BLOCK_SIZE, \ -- .walksize = 2 * AES_BLOCK_SIZE, \ -- .setkey = xts_setkey_aesni, \ -- .encrypt = xts_encrypt_##suffix, \ -- .decrypt = xts_decrypt_##suffix, \ --}; \ -+asmlinkage void \ -+aes_ctr64_crypt_##suffix(const struct crypto_aes_ctx *key, \ -+ const u8 *src, u8 *dst, int len, const u64 le_ctr[2]);\ -+ \ -+static int ctr_crypt_##suffix(struct skcipher_request *req) \ -+{ \ -+ return ctr_crypt(req, aes_ctr64_crypt_##suffix); \ -+} \ -+ \ -+asmlinkage void \ -+aes_xctr_crypt_##suffix(const struct crypto_aes_ctx *key, \ -+ const u8 *src, u8 *dst, int len, \ -+ const u8 iv[AES_BLOCK_SIZE], u64 ctr); \ - \ --static struct simd_skcipher_alg *aes_xts_simdalg_##suffix -+static int xctr_crypt_##suffix(struct skcipher_request *req) \ -+{ \ -+ return xctr_crypt(req, aes_xctr_crypt_##suffix); \ -+} \ -+ \ -+static struct skcipher_alg skcipher_algs_##suffix[] = {{ \ -+ .base.cra_name = "__xts(aes)", \ -+ .base.cra_driver_name = "__xts-aes-" driver_name_suffix, \ -+ .base.cra_priority = priority, \ -+ .base.cra_flags = CRYPTO_ALG_INTERNAL, \ -+ .base.cra_blocksize = AES_BLOCK_SIZE, \ -+ .base.cra_ctxsize = XTS_AES_CTX_SIZE, \ -+ .base.cra_module = THIS_MODULE, \ -+ .min_keysize = 2 * AES_MIN_KEY_SIZE, \ -+ .max_keysize = 2 * AES_MAX_KEY_SIZE, \ -+ .ivsize = AES_BLOCK_SIZE, \ -+ .walksize = 2 * AES_BLOCK_SIZE, \ -+ .setkey = xts_setkey_aesni, \ -+ .encrypt = xts_encrypt_##suffix, \ -+ .decrypt = xts_decrypt_##suffix, \ -+}, { \ -+ .base.cra_name = "__ctr(aes)", \ -+ .base.cra_driver_name = "__ctr-aes-" driver_name_suffix, \ -+ .base.cra_priority = priority, \ -+ .base.cra_flags = CRYPTO_ALG_INTERNAL, \ -+ .base.cra_blocksize = 1, \ -+ .base.cra_ctxsize = CRYPTO_AES_CTX_SIZE, \ -+ .base.cra_module = THIS_MODULE, \ -+ .min_keysize = AES_MIN_KEY_SIZE, \ -+ .max_keysize = AES_MAX_KEY_SIZE, \ -+ .ivsize = AES_BLOCK_SIZE, \ -+ .chunksize = AES_BLOCK_SIZE, \ -+ .setkey = aesni_skcipher_setkey, \ -+ .encrypt = ctr_crypt_##suffix, \ -+ .decrypt = ctr_crypt_##suffix, \ -+}, { \ -+ .base.cra_name = "__xctr(aes)", \ -+ .base.cra_driver_name = "__xctr-aes-" driver_name_suffix, \ -+ .base.cra_priority = priority, \ -+ .base.cra_flags = CRYPTO_ALG_INTERNAL, \ -+ .base.cra_blocksize = 1, \ -+ .base.cra_ctxsize = CRYPTO_AES_CTX_SIZE, \ -+ .base.cra_module = THIS_MODULE, \ -+ .min_keysize = AES_MIN_KEY_SIZE, \ -+ .max_keysize = AES_MAX_KEY_SIZE, \ -+ .ivsize = AES_BLOCK_SIZE, \ -+ .chunksize = AES_BLOCK_SIZE, \ -+ .setkey = aesni_skcipher_setkey, \ -+ .encrypt = xctr_crypt_##suffix, \ -+ .decrypt = xctr_crypt_##suffix, \ -+}}; \ -+ \ -+static struct simd_skcipher_alg * \ -+simd_skcipher_algs_##suffix[ARRAY_SIZE(skcipher_algs_##suffix)] - --DEFINE_XTS_ALG(aesni_avx, "xts-aes-aesni-avx", 500); -+DEFINE_AVX_SKCIPHER_ALGS(aesni_avx, "aesni-avx", 500); - #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) --DEFINE_XTS_ALG(vaes_avx2, "xts-aes-vaes-avx2", 600); --DEFINE_XTS_ALG(vaes_avx10_256, "xts-aes-vaes-avx10_256", 700); --DEFINE_XTS_ALG(vaes_avx10_512, "xts-aes-vaes-avx10_512", 800); -+DEFINE_AVX_SKCIPHER_ALGS(vaes_avx2, "vaes-avx2", 600); -+DEFINE_AVX_SKCIPHER_ALGS(vaes_avx10_256, "vaes-avx10_256", 700); -+DEFINE_AVX_SKCIPHER_ALGS(vaes_avx10_512, "vaes-avx10_512", 800); - #endif - - /* The common part of the x86_64 AES-GCM key struct */ -@@ -1552,8 +1577,9 @@ static int __init register_avx_algs(void - - if (!boot_cpu_has(X86_FEATURE_AVX)) - return 0; -- err = simd_register_skciphers_compat(&aes_xts_alg_aesni_avx, 1, -- &aes_xts_simdalg_aesni_avx); -+ err = simd_register_skciphers_compat(skcipher_algs_aesni_avx, -+ ARRAY_SIZE(skcipher_algs_aesni_avx), -+ simd_skcipher_algs_aesni_avx); - if (err) - return err; - err = simd_register_aeads_compat(aes_gcm_algs_aesni_avx, -@@ -1561,6 +1587,12 @@ static int __init register_avx_algs(void - aes_gcm_simdalgs_aesni_avx); - if (err) - return err; -+ /* -+ * Note: not all the algorithms registered below actually require -+ * VPCLMULQDQ. But in practice every CPU with VAES also has VPCLMULQDQ. -+ * Similarly, the assembler support was added at about the same time. -+ * For simplicity, just always check for VAES and VPCLMULQDQ together. -+ */ - #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) - if (!boot_cpu_has(X86_FEATURE_AVX2) || - !boot_cpu_has(X86_FEATURE_VAES) || -@@ -1568,8 +1600,9 @@ static int __init register_avx_algs(void - !boot_cpu_has(X86_FEATURE_PCLMULQDQ) || - !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) - return 0; -- err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx2, 1, -- &aes_xts_simdalg_vaes_avx2); -+ err = simd_register_skciphers_compat(skcipher_algs_vaes_avx2, -+ ARRAY_SIZE(skcipher_algs_vaes_avx2), -+ simd_skcipher_algs_vaes_avx2); - if (err) - return err; - -@@ -1580,8 +1613,9 @@ static int __init register_avx_algs(void - XFEATURE_MASK_AVX512, NULL)) - return 0; - -- err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_256, 1, -- &aes_xts_simdalg_vaes_avx10_256); -+ err = simd_register_skciphers_compat(skcipher_algs_vaes_avx10_256, -+ ARRAY_SIZE(skcipher_algs_vaes_avx10_256), -+ simd_skcipher_algs_vaes_avx10_256); - if (err) - return err; - err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_256, -@@ -1593,13 +1627,15 @@ static int __init register_avx_algs(void - if (x86_match_cpu(zmm_exclusion_list)) { - int i; - -- aes_xts_alg_vaes_avx10_512.base.cra_priority = 1; -+ for (i = 0; i < ARRAY_SIZE(skcipher_algs_vaes_avx10_512); i++) -+ skcipher_algs_vaes_avx10_512[i].base.cra_priority = 1; - for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512); i++) - aes_gcm_algs_vaes_avx10_512[i].base.cra_priority = 1; - } - -- err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_512, 1, -- &aes_xts_simdalg_vaes_avx10_512); -+ err = simd_register_skciphers_compat(skcipher_algs_vaes_avx10_512, -+ ARRAY_SIZE(skcipher_algs_vaes_avx10_512), -+ simd_skcipher_algs_vaes_avx10_512); - if (err) - return err; - err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_512, -@@ -1613,27 +1649,31 @@ static int __init register_avx_algs(void - - static void unregister_avx_algs(void) - { -- if (aes_xts_simdalg_aesni_avx) -- simd_unregister_skciphers(&aes_xts_alg_aesni_avx, 1, -- &aes_xts_simdalg_aesni_avx); -+ if (simd_skcipher_algs_aesni_avx[0]) -+ simd_unregister_skciphers(skcipher_algs_aesni_avx, -+ ARRAY_SIZE(skcipher_algs_aesni_avx), -+ simd_skcipher_algs_aesni_avx); - if (aes_gcm_simdalgs_aesni_avx[0]) - simd_unregister_aeads(aes_gcm_algs_aesni_avx, - ARRAY_SIZE(aes_gcm_algs_aesni_avx), - aes_gcm_simdalgs_aesni_avx); - #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) -- if (aes_xts_simdalg_vaes_avx2) -- simd_unregister_skciphers(&aes_xts_alg_vaes_avx2, 1, -- &aes_xts_simdalg_vaes_avx2); -- if (aes_xts_simdalg_vaes_avx10_256) -- simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_256, 1, -- &aes_xts_simdalg_vaes_avx10_256); -+ if (simd_skcipher_algs_vaes_avx2[0]) -+ simd_unregister_skciphers(skcipher_algs_vaes_avx2, -+ ARRAY_SIZE(skcipher_algs_vaes_avx2), -+ simd_skcipher_algs_vaes_avx2); -+ if (simd_skcipher_algs_vaes_avx10_256[0]) -+ simd_unregister_skciphers(skcipher_algs_vaes_avx10_256, -+ ARRAY_SIZE(skcipher_algs_vaes_avx10_256), -+ simd_skcipher_algs_vaes_avx10_256); - if (aes_gcm_simdalgs_vaes_avx10_256[0]) - simd_unregister_aeads(aes_gcm_algs_vaes_avx10_256, - ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256), - aes_gcm_simdalgs_vaes_avx10_256); -- if (aes_xts_simdalg_vaes_avx10_512) -- simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_512, 1, -- &aes_xts_simdalg_vaes_avx10_512); -+ if (simd_skcipher_algs_vaes_avx10_512[0]) -+ simd_unregister_skciphers(skcipher_algs_vaes_avx10_512, -+ ARRAY_SIZE(skcipher_algs_vaes_avx10_512), -+ simd_skcipher_algs_vaes_avx10_512); - if (aes_gcm_simdalgs_vaes_avx10_512[0]) - simd_unregister_aeads(aes_gcm_algs_vaes_avx10_512, - ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512), -@@ -1666,13 +1706,6 @@ static int __init aesni_init(void) - - if (!x86_match_cpu(aesni_cpu_id)) - return -ENODEV; --#ifdef CONFIG_X86_64 -- if (boot_cpu_has(X86_FEATURE_AVX)) { -- /* optimize performance of ctr mode encryption transform */ -- static_call_update(aesni_ctr_enc_tfm, aesni_ctr_enc_avx_tfm); -- pr_info("AES CTR mode by8 optimization enabled\n"); -- } --#endif /* CONFIG_X86_64 */ - - err = crypto_register_alg(&aesni_cipher_alg); - if (err) -@@ -1690,14 +1723,6 @@ static int __init aesni_init(void) - if (err) - goto unregister_skciphers; - --#ifdef CONFIG_X86_64 -- if (boot_cpu_has(X86_FEATURE_AVX)) -- err = simd_register_skciphers_compat(&aesni_xctr, 1, -- &aesni_simd_xctr); -- if (err) -- goto unregister_aeads; --#endif /* CONFIG_X86_64 */ -- - err = register_avx_algs(); - if (err) - goto unregister_avx; -@@ -1706,11 +1731,6 @@ static int __init aesni_init(void) - - unregister_avx: - unregister_avx_algs(); --#ifdef CONFIG_X86_64 -- if (aesni_simd_xctr) -- simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr); --unregister_aeads: --#endif /* CONFIG_X86_64 */ - simd_unregister_aeads(aes_gcm_algs_aesni, - ARRAY_SIZE(aes_gcm_algs_aesni), - aes_gcm_simdalgs_aesni); -@@ -1730,10 +1750,6 @@ static void __exit aesni_exit(void) - simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), - aesni_simd_skciphers); - crypto_unregister_alg(&aesni_cipher_alg); --#ifdef CONFIG_X86_64 -- if (boot_cpu_has(X86_FEATURE_AVX)) -- simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr); --#endif /* CONFIG_X86_64 */ - unregister_avx_algs(); - } - diff --git a/debian/patches/patchset-pf/fixes/0001-Kunit-to-check-the-longest-symbol-length.patch b/debian/patches/patchset-pf/fixes/0001-Kunit-to-check-the-longest-symbol-length.patch deleted file mode 100644 index 7bb83f8..0000000 --- a/debian/patches/patchset-pf/fixes/0001-Kunit-to-check-the-longest-symbol-length.patch +++ /dev/null @@ -1,176 +0,0 @@ -From 4506de20739ac4726a258faa98609a552184d2d2 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Sergio=20Gonz=C3=A1lez=20Collado?= - -Date: Sun, 2 Mar 2025 23:15:18 +0100 -Subject: Kunit to check the longest symbol length -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The longest length of a symbol (KSYM_NAME_LEN) was increased to 512 -in the reference [1]. This patch adds kunit test suite to check the longest -symbol length. These tests verify that the longest symbol length defined -is supported. - -This test can also help other efforts for longer symbol length, -like [2]. - -The test suite defines one symbol with the longest possible length. - -The first test verify that functions with names of the created -symbol, can be called or not. - -The second test, verify that the symbols are created (or -not) in the kernel symbol table. - -[1] https://lore.kernel.org/lkml/20220802015052.10452-6-ojeda@kernel.org/ -[2] https://lore.kernel.org/lkml/20240605032120.3179157-1-song@kernel.org/ - -Tested-by: Martin Rodriguez Reboredo -Reviewed-by: Shuah Khan -Reviewed-by: Rae Moar -Signed-off-by: Sergio González Collado -Link: https://github.com/Rust-for-Linux/linux/issues/504 -Source: https://lore.kernel.org/rust-for-linux/20250302221518.76874-1-sergio.collado@gmail.com/ -Cherry-picked-for: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/63 ---- - arch/x86/tools/insn_decoder_test.c | 3 +- - lib/Kconfig.debug | 9 ++++ - lib/Makefile | 2 + - lib/longest_symbol_kunit.c | 82 ++++++++++++++++++++++++++++++ - 4 files changed, 95 insertions(+), 1 deletion(-) - create mode 100644 lib/longest_symbol_kunit.c - ---- a/arch/x86/tools/insn_decoder_test.c -+++ b/arch/x86/tools/insn_decoder_test.c -@@ -10,6 +10,7 @@ - #include - #include - #include -+#include - - #define unlikely(cond) (cond) - -@@ -106,7 +107,7 @@ static void parse_args(int argc, char ** - } - } - --#define BUFSIZE 256 -+#define BUFSIZE (256 + KSYM_NAME_LEN) - - int main(int argc, char **argv) - { ---- a/lib/Kconfig.debug -+++ b/lib/Kconfig.debug -@@ -2838,6 +2838,15 @@ config FORTIFY_KUNIT_TEST - by the str*() and mem*() family of functions. For testing runtime - traps of FORTIFY_SOURCE, see LKDTM's "FORTIFY_*" tests. - -+config LONGEST_SYM_KUNIT_TEST -+ tristate "Test the longest symbol possible" if !KUNIT_ALL_TESTS -+ depends on KUNIT && KPROBES -+ default KUNIT_ALL_TESTS -+ help -+ Tests the longest symbol possible -+ -+ If unsure, say N. -+ - config HW_BREAKPOINT_KUNIT_TEST - bool "Test hw_breakpoint constraints accounting" if !KUNIT_ALL_TESTS - depends on HAVE_HW_BREAKPOINT ---- a/lib/Makefile -+++ b/lib/Makefile -@@ -398,6 +398,8 @@ obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fort - obj-$(CONFIG_CRC_KUNIT_TEST) += crc_kunit.o - obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o - obj-$(CONFIG_USERCOPY_KUNIT_TEST) += usercopy_kunit.o -+obj-$(CONFIG_LONGEST_SYM_KUNIT_TEST) += longest_symbol_kunit.o -+CFLAGS_longest_symbol_kunit.o += $(call cc-disable-warning, missing-prototypes) - - obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o - ---- /dev/null -+++ b/lib/longest_symbol_kunit.c -@@ -0,0 +1,82 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Test the longest symbol length. Execute with: -+ * ./tools/testing/kunit/kunit.py run longest-symbol -+ * --arch=x86_64 --kconfig_add CONFIG_KPROBES=y --kconfig_add CONFIG_MODULES=y -+ * --kconfig_add CONFIG_RETPOLINE=n --kconfig_add CONFIG_CFI_CLANG=n -+ * --kconfig_add CONFIG_MITIGATION_RETPOLINE=n -+ */ -+ -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#include -+#include -+#include -+#include -+ -+#define DI(name) s##name##name -+#define DDI(name) DI(n##name##name) -+#define DDDI(name) DDI(n##name##name) -+#define DDDDI(name) DDDI(n##name##name) -+#define DDDDDI(name) DDDDI(n##name##name) -+ -+/*Generate a symbol whose name length is 511 */ -+#define LONGEST_SYM_NAME DDDDDI(g1h2i3j4k5l6m7n) -+ -+#define RETURN_LONGEST_SYM 0xAAAAA -+ -+noinline int LONGEST_SYM_NAME(void); -+noinline int LONGEST_SYM_NAME(void) -+{ -+ return RETURN_LONGEST_SYM; -+} -+ -+_Static_assert(sizeof(__stringify(LONGEST_SYM_NAME)) == KSYM_NAME_LEN, -+"Incorrect symbol length found. Expected KSYM_NAME_LEN: " -+__stringify(KSYM_NAME_LEN) ", but found: " -+__stringify(sizeof(LONGEST_SYM_NAME))); -+ -+static void test_longest_symbol(struct kunit *test) -+{ -+ KUNIT_EXPECT_EQ(test, RETURN_LONGEST_SYM, LONGEST_SYM_NAME()); -+}; -+ -+static void test_longest_symbol_kallsyms(struct kunit *test) -+{ -+ unsigned long (*kallsyms_lookup_name)(const char *name); -+ static int (*longest_sym)(void); -+ -+ struct kprobe kp = { -+ .symbol_name = "kallsyms_lookup_name", -+ }; -+ -+ if (register_kprobe(&kp) < 0) { -+ pr_info("%s: kprobe not registered", __func__); -+ KUNIT_FAIL(test, "test_longest_symbol kallsyms: kprobe not registered\n"); -+ return; -+ } -+ -+ kunit_warn(test, "test_longest_symbol kallsyms: kprobe registered\n"); -+ kallsyms_lookup_name = (unsigned long (*)(const char *name))kp.addr; -+ unregister_kprobe(&kp); -+ -+ longest_sym = -+ (void *) kallsyms_lookup_name(__stringify(LONGEST_SYM_NAME)); -+ KUNIT_EXPECT_EQ(test, RETURN_LONGEST_SYM, longest_sym()); -+}; -+ -+static struct kunit_case longest_symbol_test_cases[] = { -+ KUNIT_CASE(test_longest_symbol), -+ KUNIT_CASE(test_longest_symbol_kallsyms), -+ {} -+}; -+ -+static struct kunit_suite longest_symbol_test_suite = { -+ .name = "longest-symbol", -+ .test_cases = longest_symbol_test_cases, -+}; -+kunit_test_suite(longest_symbol_test_suite); -+ -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("Test the longest symbol length"); -+MODULE_AUTHOR("Sergio González Collado"); diff --git a/debian/patches/patchset-pf/fixes/0001-mm-fix-ratelimit_pages-update-error-in-dirty_ratio_h.patch b/debian/patches/patchset-pf/fixes/0001-mm-fix-ratelimit_pages-update-error-in-dirty_ratio_h.patch new file mode 100644 index 0000000..a1ccdca --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0001-mm-fix-ratelimit_pages-update-error-in-dirty_ratio_h.patch @@ -0,0 +1,70 @@ +From cda8b1022f32bb7a917148f75f4641e7a5b3e729 Mon Sep 17 00:00:00 2001 +From: Jinliang Zheng +Date: Tue, 15 Apr 2025 17:02:32 +0800 +Subject: mm: fix ratelimit_pages update error in dirty_ratio_handler() + +In dirty_ratio_handler(), vm_dirty_bytes must be set to zero before +calling writeback_set_ratelimit(), as global_dirty_limits() always +prioritizes the value of vm_dirty_bytes. + +It's domain_dirty_limits() that's relevant here, not node_dirty_ok: + + dirty_ratio_handler + writeback_set_ratelimit + global_dirty_limits(&dirty_thresh) <- ratelimit_pages based on dirty_thresh + domain_dirty_limits + if (bytes) <- bytes = vm_dirty_bytes <--------+ + thresh = f1(bytes) <- prioritizes vm_dirty_bytes | + else | + thresh = f2(ratio) | + ratelimit_pages = f3(dirty_thresh) | + vm_dirty_bytes = 0 <- it's late! ---------------------+ + +This causes ratelimit_pages to still use the value calculated based on +vm_dirty_bytes, which is wrong now. + + +The impact visible to userspace is difficult to capture directly because +there is no procfs/sysfs interface exported to user space. However, it +will have a real impact on the balance of dirty pages. + +For example: + +1. On default, we have vm_dirty_ratio=40, vm_dirty_bytes=0 + +2. echo 8192 > dirty_bytes, then vm_dirty_bytes=8192, + vm_dirty_ratio=0, and ratelimit_pages is calculated based on + vm_dirty_bytes now. + +3. echo 20 > dirty_ratio, then since vm_dirty_bytes is not reset to + zero when writeback_set_ratelimit() -> global_dirty_limits() -> + domain_dirty_limits() is called, reallimit_pages is still calculated + based on vm_dirty_bytes instead of vm_dirty_ratio. This does not + conform to the actual intent of the user. + +Link: https://lkml.kernel.org/r/20250415090232.7544-1-alexjlzheng@tencent.com +Fixes: 9d823e8f6b1b ("writeback: per task dirty rate limit") +Signed-off-by: Jinliang Zheng +Reviewed-by: MengEn Sun +Cc: Andrea Righi +Cc: Fenggaung Wu +Cc: Jinliang Zheng +Cc: Matthew Wilcox (Oracle) +Cc: +Signed-off-by: Andrew Morton +--- + mm/page-writeback.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -520,8 +520,8 @@ static int dirty_ratio_handler(const str + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (ret == 0 && write && vm_dirty_ratio != old_ratio) { +- writeback_set_ratelimit(); + vm_dirty_bytes = 0; ++ writeback_set_ratelimit(); + } + return ret; + } diff --git a/debian/patches/patchset-pf/fixes/0002-vgacon-Add-check-for-vc_origin-address-range-in-vgac.patch b/debian/patches/patchset-pf/fixes/0002-vgacon-Add-check-for-vc_origin-address-range-in-vgac.patch new file mode 100644 index 0000000..23085db --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0002-vgacon-Add-check-for-vc_origin-address-range-in-vgac.patch @@ -0,0 +1,179 @@ +From 30a724581b5037176f6492359c189ebb180ccf1f Mon Sep 17 00:00:00 2001 +From: GONG Ruiqi +Date: Sun, 27 Apr 2025 10:53:03 +0800 +Subject: vgacon: Add check for vc_origin address range in vgacon_scroll() + +Our in-house Syzkaller reported the following BUG (twice), which we +believed was the same issue with [1]: + +================================================================== +BUG: KASAN: slab-out-of-bounds in vcs_scr_readw+0xc2/0xd0 drivers/tty/vt/vt.c:4740 +Read of size 2 at addr ffff88800f5bef60 by task syz.7.2620/12393 +... +Call Trace: + + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0x72/0xa0 lib/dump_stack.c:106 + print_address_description.constprop.0+0x6b/0x3d0 mm/kasan/report.c:364 + print_report+0xba/0x280 mm/kasan/report.c:475 + kasan_report+0xa9/0xe0 mm/kasan/report.c:588 + vcs_scr_readw+0xc2/0xd0 drivers/tty/vt/vt.c:4740 + vcs_write_buf_noattr drivers/tty/vt/vc_screen.c:493 [inline] + vcs_write+0x586/0x840 drivers/tty/vt/vc_screen.c:690 + vfs_write+0x219/0x960 fs/read_write.c:584 + ksys_write+0x12e/0x260 fs/read_write.c:639 + do_syscall_x64 arch/x86/entry/common.c:51 [inline] + do_syscall_64+0x59/0x110 arch/x86/entry/common.c:81 + entry_SYSCALL_64_after_hwframe+0x78/0xe2 + ... + + +Allocated by task 5614: + kasan_save_stack+0x20/0x40 mm/kasan/common.c:45 + kasan_set_track+0x25/0x30 mm/kasan/common.c:52 + ____kasan_kmalloc mm/kasan/common.c:374 [inline] + __kasan_kmalloc+0x8f/0xa0 mm/kasan/common.c:383 + kasan_kmalloc include/linux/kasan.h:201 [inline] + __do_kmalloc_node mm/slab_common.c:1007 [inline] + __kmalloc+0x62/0x140 mm/slab_common.c:1020 + kmalloc include/linux/slab.h:604 [inline] + kzalloc include/linux/slab.h:721 [inline] + vc_do_resize+0x235/0xf40 drivers/tty/vt/vt.c:1193 + vgacon_adjust_height+0x2d4/0x350 drivers/video/console/vgacon.c:1007 + vgacon_font_set+0x1f7/0x240 drivers/video/console/vgacon.c:1031 + con_font_set drivers/tty/vt/vt.c:4628 [inline] + con_font_op+0x4da/0xa20 drivers/tty/vt/vt.c:4675 + vt_k_ioctl+0xa10/0xb30 drivers/tty/vt/vt_ioctl.c:474 + vt_ioctl+0x14c/0x1870 drivers/tty/vt/vt_ioctl.c:752 + tty_ioctl+0x655/0x1510 drivers/tty/tty_io.c:2779 + vfs_ioctl fs/ioctl.c:51 [inline] + __do_sys_ioctl fs/ioctl.c:871 [inline] + __se_sys_ioctl+0x12d/0x190 fs/ioctl.c:857 + do_syscall_x64 arch/x86/entry/common.c:51 [inline] + do_syscall_64+0x59/0x110 arch/x86/entry/common.c:81 + entry_SYSCALL_64_after_hwframe+0x78/0xe2 + +Last potentially related work creation: + kasan_save_stack+0x20/0x40 mm/kasan/common.c:45 + __kasan_record_aux_stack+0x94/0xa0 mm/kasan/generic.c:492 + __call_rcu_common.constprop.0+0xc3/0xa10 kernel/rcu/tree.c:2713 + netlink_release+0x620/0xc20 net/netlink/af_netlink.c:802 + __sock_release+0xb5/0x270 net/socket.c:663 + sock_close+0x1e/0x30 net/socket.c:1425 + __fput+0x408/0xab0 fs/file_table.c:384 + __fput_sync+0x4c/0x60 fs/file_table.c:465 + __do_sys_close fs/open.c:1580 [inline] + __se_sys_close+0x68/0xd0 fs/open.c:1565 + do_syscall_x64 arch/x86/entry/common.c:51 [inline] + do_syscall_64+0x59/0x110 arch/x86/entry/common.c:81 + entry_SYSCALL_64_after_hwframe+0x78/0xe2 + +Second to last potentially related work creation: + kasan_save_stack+0x20/0x40 mm/kasan/common.c:45 + __kasan_record_aux_stack+0x94/0xa0 mm/kasan/generic.c:492 + __call_rcu_common.constprop.0+0xc3/0xa10 kernel/rcu/tree.c:2713 + netlink_release+0x620/0xc20 net/netlink/af_netlink.c:802 + __sock_release+0xb5/0x270 net/socket.c:663 + sock_close+0x1e/0x30 net/socket.c:1425 + __fput+0x408/0xab0 fs/file_table.c:384 + task_work_run+0x154/0x240 kernel/task_work.c:239 + exit_task_work include/linux/task_work.h:45 [inline] + do_exit+0x8e5/0x1320 kernel/exit.c:874 + do_group_exit+0xcd/0x280 kernel/exit.c:1023 + get_signal+0x1675/0x1850 kernel/signal.c:2905 + arch_do_signal_or_restart+0x80/0x3b0 arch/x86/kernel/signal.c:310 + exit_to_user_mode_loop kernel/entry/common.c:111 [inline] + exit_to_user_mode_prepare include/linux/entry-common.h:328 [inline] + __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline] + syscall_exit_to_user_mode+0x1b3/0x1e0 kernel/entry/common.c:218 + do_syscall_64+0x66/0x110 arch/x86/entry/common.c:87 + entry_SYSCALL_64_after_hwframe+0x78/0xe2 + +The buggy address belongs to the object at ffff88800f5be000 + which belongs to the cache kmalloc-2k of size 2048 +The buggy address is located 2656 bytes to the right of + allocated 1280-byte region [ffff88800f5be000, ffff88800f5be500) + +... + +Memory state around the buggy address: + ffff88800f5bee00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff88800f5bee80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +>ffff88800f5bef00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ^ + ffff88800f5bef80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff88800f5bf000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +================================================================== + +By analyzing the vmcore, we found that vc->vc_origin was somehow placed +one line prior to vc->vc_screenbuf when vc was in KD_TEXT mode, and +further writings to /dev/vcs caused out-of-bounds reads (and writes +right after) in vcs_write_buf_noattr(). + +Our further experiments show that in most cases, vc->vc_origin equals to +vga_vram_base when the console is in KD_TEXT mode, and it's around +vc->vc_screenbuf for the KD_GRAPHICS mode. But via triggerring a +TIOCL_SETVESABLANK ioctl beforehand, we can make vc->vc_origin be around +vc->vc_screenbuf while the console is in KD_TEXT mode, and then by +writing the special 'ESC M' control sequence to the tty certain times +(depends on the value of `vc->state.y - vc->vc_top`), we can eventually +move vc->vc_origin prior to vc->vc_screenbuf. Here's the PoC, tested on +QEMU: + +``` +int main() { + const int RI_NUM = 10; // should be greater than `vc->state.y - vc->vc_top` + int tty_fd, vcs_fd; + const char *tty_path = "/dev/tty0"; + const char *vcs_path = "/dev/vcs"; + const char escape_seq[] = "\x1bM"; // ESC + M + const char trigger_seq[] = "Let's trigger an OOB write."; + struct vt_sizes vt_size = { 70, 2 }; + int blank = TIOCL_BLANKSCREEN; + + tty_fd = open(tty_path, O_RDWR); + + char vesa_mode[] = { TIOCL_SETVESABLANK, 1 }; + ioctl(tty_fd, TIOCLINUX, vesa_mode); + + ioctl(tty_fd, TIOCLINUX, &blank); + ioctl(tty_fd, VT_RESIZE, &vt_size); + + for (int i = 0; i < RI_NUM; ++i) + write(tty_fd, escape_seq, sizeof(escape_seq) - 1); + + vcs_fd = open(vcs_path, O_RDWR); + write(vcs_fd, trigger_seq, sizeof(trigger_seq)); + + close(vcs_fd); + close(tty_fd); + return 0; +} +``` + +To solve this problem, add an address range validation check in +vgacon_scroll(), ensuring vc->vc_origin never precedes vc_screenbuf. + +Reported-by: syzbot+9c09fda97a1a65ea859b@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=9c09fda97a1a65ea859b [1] +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Cc: stable@vger.kernel.org +Co-developed-by: Yi Yang +Signed-off-by: Yi Yang +Signed-off-by: GONG Ruiqi +Signed-off-by: Helge Deller +--- + drivers/video/console/vgacon.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/video/console/vgacon.c ++++ b/drivers/video/console/vgacon.c +@@ -1168,7 +1168,7 @@ static bool vgacon_scroll(struct vc_data + c->vc_screenbuf_size - delta); + c->vc_origin = vga_vram_end - c->vc_screenbuf_size; + vga_rolled_over = 0; +- } else ++ } else if (oldo - delta >= (unsigned long)c->vc_screenbuf) + c->vc_origin -= delta; + c->vc_scr_end = c->vc_origin + c->vc_screenbuf_size; + scr_memsetw((u16 *) (c->vc_origin), c->vc_video_erase_char, diff --git a/debian/patches/patchset-pf/fixes/0002-x86-tools-Drop-duplicate-unlikely-definition-in-insn.patch b/debian/patches/patchset-pf/fixes/0002-x86-tools-Drop-duplicate-unlikely-definition-in-insn.patch deleted file mode 100644 index c9e77cf..0000000 --- a/debian/patches/patchset-pf/fixes/0002-x86-tools-Drop-duplicate-unlikely-definition-in-insn.patch +++ /dev/null @@ -1,36 +0,0 @@ -From b5a4b82efd19d0687a5582a58f6830bf714e34fc Mon Sep 17 00:00:00 2001 -From: Nathan Chancellor -Date: Tue, 18 Mar 2025 15:32:30 -0700 -Subject: x86/tools: Drop duplicate unlikely() definition in - insn_decoder_test.c - -After commit c104c16073b7 ("Kunit to check the longest symbol length"), -there is a warning when building with clang because there is now a -definition of unlikely from compiler.h in tools/include/linux, which -conflicts with the one in the instruction decoder selftest: - - arch/x86/tools/insn_decoder_test.c:15:9: warning: 'unlikely' macro redefined [-Wmacro-redefined] - -Remove the second unlikely() definition, as it is no longer necessary, -clearing up the warning. - -Fixes: c104c16073b7 ("Kunit to check the longest symbol length") -Signed-off-by: Nathan Chancellor -Signed-off-by: Ingo Molnar -Acked-by: Shuah Khan -Link: https://lore.kernel.org/r/20250318-x86-decoder-test-fix-unlikely-redef-v1-1-74c84a7bf05b@kernel.org ---- - arch/x86/tools/insn_decoder_test.c | 2 -- - 1 file changed, 2 deletions(-) - ---- a/arch/x86/tools/insn_decoder_test.c -+++ b/arch/x86/tools/insn_decoder_test.c -@@ -12,8 +12,6 @@ - #include - #include - --#define unlikely(cond) (cond) -- - #include - #include - #include diff --git a/debian/patches/patchset-pf/fixes/0003-fbdev-Fix-do_register_framebuffer-to-prevent-null-pt.patch b/debian/patches/patchset-pf/fixes/0003-fbdev-Fix-do_register_framebuffer-to-prevent-null-pt.patch new file mode 100644 index 0000000..b55ba65 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0003-fbdev-Fix-do_register_framebuffer-to-prevent-null-pt.patch @@ -0,0 +1,102 @@ +From 5cf26cf9fd9c11cb1543aac026f8928829895663 Mon Sep 17 00:00:00 2001 +From: Murad Masimov +Date: Mon, 28 Apr 2025 18:34:06 +0300 +Subject: fbdev: Fix do_register_framebuffer to prevent null-ptr-deref in + fb_videomode_to_var + +If fb_add_videomode() in do_register_framebuffer() fails to allocate +memory for fb_videomode, it will later lead to a null-ptr dereference in +fb_videomode_to_var(), as the fb_info is registered while not having the +mode in modelist that is expected to be there, i.e. the one that is +described in fb_info->var. + +================================================================ +general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN NOPTI +KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] +CPU: 1 PID: 30371 Comm: syz-executor.1 Not tainted 5.10.226-syzkaller #0 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 +RIP: 0010:fb_videomode_to_var+0x24/0x610 drivers/video/fbdev/core/modedb.c:901 +Call Trace: + display_to_var+0x3a/0x7c0 drivers/video/fbdev/core/fbcon.c:929 + fbcon_resize+0x3e2/0x8f0 drivers/video/fbdev/core/fbcon.c:2071 + resize_screen drivers/tty/vt/vt.c:1176 [inline] + vc_do_resize+0x53a/0x1170 drivers/tty/vt/vt.c:1263 + fbcon_modechanged+0x3ac/0x6e0 drivers/video/fbdev/core/fbcon.c:2720 + fbcon_update_vcs+0x43/0x60 drivers/video/fbdev/core/fbcon.c:2776 + do_fb_ioctl+0x6d2/0x740 drivers/video/fbdev/core/fbmem.c:1128 + fb_ioctl+0xe7/0x150 drivers/video/fbdev/core/fbmem.c:1203 + vfs_ioctl fs/ioctl.c:48 [inline] + __do_sys_ioctl fs/ioctl.c:753 [inline] + __se_sys_ioctl fs/ioctl.c:739 [inline] + __x64_sys_ioctl+0x19a/0x210 fs/ioctl.c:739 + do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46 + entry_SYSCALL_64_after_hwframe+0x67/0xd1 +================================================================ + +Even though fbcon_init() checks beforehand if fb_match_mode() in +var_to_display() fails, it can not prevent the panic because fbcon_init() +does not return error code. Considering this and the comment in the code +about fb_match_mode() returning NULL - "This should not happen" - it is +better to prevent registering the fb_info if its mode was not set +successfully. Also move fb_add_videomode() closer to the beginning of +do_register_framebuffer() to avoid having to do the cleanup on fail. + +Found by Linux Verification Center (linuxtesting.org) with Syzkaller. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Cc: stable@vger.kernel.org +Signed-off-by: Murad Masimov +Signed-off-by: Helge Deller +--- + drivers/video/fbdev/core/fbmem.c | 18 +++++++++++------- + 1 file changed, 11 insertions(+), 7 deletions(-) + +--- a/drivers/video/fbdev/core/fbmem.c ++++ b/drivers/video/fbdev/core/fbmem.c +@@ -388,7 +388,7 @@ static int fb_check_foreignness(struct f + + static int do_register_framebuffer(struct fb_info *fb_info) + { +- int i; ++ int i, err = 0; + struct fb_videomode mode; + + if (fb_check_foreignness(fb_info)) +@@ -397,10 +397,18 @@ static int do_register_framebuffer(struc + if (num_registered_fb == FB_MAX) + return -ENXIO; + +- num_registered_fb++; + for (i = 0 ; i < FB_MAX; i++) + if (!registered_fb[i]) + break; ++ ++ if (!fb_info->modelist.prev || !fb_info->modelist.next) ++ INIT_LIST_HEAD(&fb_info->modelist); ++ ++ fb_var_to_videomode(&mode, &fb_info->var); ++ err = fb_add_videomode(&mode, &fb_info->modelist); ++ if (err < 0) ++ return err; ++ + fb_info->node = i; + refcount_set(&fb_info->count, 1); + mutex_init(&fb_info->lock); +@@ -426,16 +434,12 @@ static int do_register_framebuffer(struc + if (bitmap_empty(fb_info->pixmap.blit_y, FB_MAX_BLIT_HEIGHT)) + bitmap_fill(fb_info->pixmap.blit_y, FB_MAX_BLIT_HEIGHT); + +- if (!fb_info->modelist.prev || !fb_info->modelist.next) +- INIT_LIST_HEAD(&fb_info->modelist); +- + if (fb_info->skip_vt_switch) + pm_vt_switch_required(fb_info->device, false); + else + pm_vt_switch_required(fb_info->device, true); + +- fb_var_to_videomode(&mode, &fb_info->var); +- fb_add_videomode(&mode, &fb_info->modelist); ++ num_registered_fb++; + registered_fb[i] = fb_info; + + #ifdef CONFIG_GUMSTIX_AM200EPD diff --git a/debian/patches/patchset-pf/fixes/0004-fbdev-Fix-fb_set_var-to-prevent-null-ptr-deref-in-fb.patch b/debian/patches/patchset-pf/fixes/0004-fbdev-Fix-fb_set_var-to-prevent-null-ptr-deref-in-fb.patch new file mode 100644 index 0000000..4d1505f --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0004-fbdev-Fix-fb_set_var-to-prevent-null-ptr-deref-in-fb.patch @@ -0,0 +1,65 @@ +From 54c7f478f1a9d58f5609a48d461c7d495bb8301a Mon Sep 17 00:00:00 2001 +From: Murad Masimov +Date: Mon, 28 Apr 2025 18:34:07 +0300 +Subject: fbdev: Fix fb_set_var to prevent null-ptr-deref in + fb_videomode_to_var + +If fb_add_videomode() in fb_set_var() fails to allocate memory for +fb_videomode, later it may lead to a null-ptr dereference in +fb_videomode_to_var(), as the fb_info is registered while not having the +mode in modelist that is expected to be there, i.e. the one that is +described in fb_info->var. + +================================================================ +general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN NOPTI +KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] +CPU: 1 PID: 30371 Comm: syz-executor.1 Not tainted 5.10.226-syzkaller #0 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 +RIP: 0010:fb_videomode_to_var+0x24/0x610 drivers/video/fbdev/core/modedb.c:901 +Call Trace: + display_to_var+0x3a/0x7c0 drivers/video/fbdev/core/fbcon.c:929 + fbcon_resize+0x3e2/0x8f0 drivers/video/fbdev/core/fbcon.c:2071 + resize_screen drivers/tty/vt/vt.c:1176 [inline] + vc_do_resize+0x53a/0x1170 drivers/tty/vt/vt.c:1263 + fbcon_modechanged+0x3ac/0x6e0 drivers/video/fbdev/core/fbcon.c:2720 + fbcon_update_vcs+0x43/0x60 drivers/video/fbdev/core/fbcon.c:2776 + do_fb_ioctl+0x6d2/0x740 drivers/video/fbdev/core/fbmem.c:1128 + fb_ioctl+0xe7/0x150 drivers/video/fbdev/core/fbmem.c:1203 + vfs_ioctl fs/ioctl.c:48 [inline] + __do_sys_ioctl fs/ioctl.c:753 [inline] + __se_sys_ioctl fs/ioctl.c:739 [inline] + __x64_sys_ioctl+0x19a/0x210 fs/ioctl.c:739 + do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46 + entry_SYSCALL_64_after_hwframe+0x67/0xd1 +================================================================ + +The reason is that fb_info->var is being modified in fb_set_var(), and +then fb_videomode_to_var() is called. If it fails to add the mode to +fb_info->modelist, fb_set_var() returns error, but does not restore the +old value of fb_info->var. Restore fb_info->var on failure the same way +it is done earlier in the function. + +Found by Linux Verification Center (linuxtesting.org) with Syzkaller. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Cc: stable@vger.kernel.org +Signed-off-by: Murad Masimov +Signed-off-by: Helge Deller +--- + drivers/video/fbdev/core/fbmem.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/video/fbdev/core/fbmem.c ++++ b/drivers/video/fbdev/core/fbmem.c +@@ -328,8 +328,10 @@ fb_set_var(struct fb_info *info, struct + !list_empty(&info->modelist)) + ret = fb_add_videomode(&mode, &info->modelist); + +- if (ret) ++ if (ret) { ++ info->var = old_var; + return ret; ++ } + + event.info = info; + event.data = &mode; diff --git a/debian/patches/patchset-pf/fixes/0004-wifi-ath12k-Abort-scan-before-removing-link-interfac.patch b/debian/patches/patchset-pf/fixes/0004-wifi-ath12k-Abort-scan-before-removing-link-interfac.patch deleted file mode 100644 index 7aae981..0000000 --- a/debian/patches/patchset-pf/fixes/0004-wifi-ath12k-Abort-scan-before-removing-link-interfac.patch +++ /dev/null @@ -1,40 +0,0 @@ -From e56acee381a8e07edf1920fb58f3166f911b6e5c Mon Sep 17 00:00:00 2001 -From: Lingbo Kong -Date: Wed, 26 Feb 2025 19:31:18 +0800 -Subject: wifi: ath12k: Abort scan before removing link interface to prevent - duplicate deletion - -Currently, when ath12k performs the remove link interface operation, if -there is an ongoing scan operation on the arvif, ath12k may execute the -remove link interface operation multiple times on the same arvif. This -occurs because, during the remove link operation, if a scan operation is -present on the arvif, ath12k may receive a WMI_SCAN_EVENT_COMPLETED event -from the firmware. Upon receiving this event, ath12k will continue to -execute the ath12k_scan_vdev_clean_work() function, performing the remove -link interface operation on the same arvif again. - -To address this issue, before executing the remove link interface -operation, ath12k needs to check if there is an ongoing scan operation on -the current arvif. If such an operation exists, it should be aborted. - -Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 - -Signed-off-by: Lingbo Kong ---- - drivers/net/wireless/ath/ath12k/mac.c | 5 +++++ - 1 file changed, 5 insertions(+) - ---- a/drivers/net/wireless/ath/ath12k/mac.c -+++ b/drivers/net/wireless/ath/ath12k/mac.c -@@ -9395,6 +9395,11 @@ ath12k_mac_op_unassign_vif_chanctx(struc - ar->num_started_vdevs == 1 && ar->monitor_vdev_created) - ath12k_mac_monitor_stop(ar); - -+ if (ar->scan.arvif == arvif && ar->scan.state == ATH12K_SCAN_RUNNING) { -+ ath12k_scan_abort(ar); -+ ar->scan.arvif = NULL; -+ } -+ - ath12k_mac_remove_link_interface(hw, arvif); - ath12k_mac_unassign_link_vif(arvif); - } diff --git a/debian/patches/patchset-pf/fixes/0005-Kconfig-switch-CONFIG_SYSFS_SYCALL-default-to-n.patch b/debian/patches/patchset-pf/fixes/0005-Kconfig-switch-CONFIG_SYSFS_SYCALL-default-to-n.patch deleted file mode 100644 index 03e3c63..0000000 --- a/debian/patches/patchset-pf/fixes/0005-Kconfig-switch-CONFIG_SYSFS_SYCALL-default-to-n.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 8d0e02f81d08c7b1e082028af0f55a22e7e1dfb2 Mon Sep 17 00:00:00 2001 -From: Christian Brauner -Date: Tue, 15 Apr 2025 10:22:04 +0200 -Subject: Kconfig: switch CONFIG_SYSFS_SYCALL default to n - -This odd system call will be removed in the future. Let's decouple it -from CONFIG_EXPERT and switch the default to n as a first step. - -Signed-off-by: Christian Brauner ---- - init/Kconfig | 20 ++++++++++---------- - 1 file changed, 10 insertions(+), 10 deletions(-) - ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -1603,6 +1603,16 @@ config SYSCTL_ARCH_UNALIGN_ALLOW - the unaligned access emulation. - see arch/parisc/kernel/unaligned.c for reference - -+config SYSFS_SYSCALL -+ bool "Sysfs syscall support" -+ default n -+ help -+ sys_sysfs is an obsolete system call no longer supported in libc. -+ Note that disabling this option is more secure but might break -+ compatibility with some systems. -+ -+ If unsure say N here. -+ - config HAVE_PCSPKR_PLATFORM - bool - -@@ -1647,16 +1657,6 @@ config SGETMASK_SYSCALL - - If unsure, leave the default option here. - --config SYSFS_SYSCALL -- bool "Sysfs syscall support" if EXPERT -- default y -- help -- sys_sysfs is an obsolete system call no longer supported in libc. -- Note that disabling this option is more secure but might break -- compatibility with some systems. -- -- If unsure say Y here. -- - config FHANDLE - bool "open by fhandle syscalls" if EXPERT - select EXPORTFS diff --git a/debian/patches/patchset-pf/fixes/0005-anon_inode-use-a-proper-mode-internally.patch b/debian/patches/patchset-pf/fixes/0005-anon_inode-use-a-proper-mode-internally.patch new file mode 100644 index 0000000..403fbdf --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0005-anon_inode-use-a-proper-mode-internally.patch @@ -0,0 +1,113 @@ +From 9cb2f9d210f915aabe54c5061d84f3fbe93c71ea Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Mon, 7 Apr 2025 11:54:15 +0200 +Subject: anon_inode: use a proper mode internally + +This allows the VFS to not trip over anonymous inodes and we can add +asserts based on the mode into the vfs. When we report it to userspace +we can simply hide the mode to avoid regressions. I've audited all +direct callers of alloc_anon_inode() and only secretmen overrides i_mode +and i_op inode operations but it already uses a regular file. + +Link: https://lore.kernel.org/20250407-work-anon_inode-v1-1-53a44c20d44e@kernel.org +Fixes: af153bb63a336 ("vfs: catch invalid modes in may_open()") +Reviewed-by: Jeff Layton +Cc: stable@vger.kernel.org # all LTS kernels +Reported-by: syzbot+5d8e79d323a13aa0b248@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/all/67ed3fb3.050a0220.14623d.0009.GAE@google.com +Signed-off-by: Christian Brauner +--- + fs/anon_inodes.c | 36 ++++++++++++++++++++++++++++++++++++ + fs/internal.h | 3 +++ + fs/libfs.c | 8 +++++++- + 3 files changed, 46 insertions(+), 1 deletion(-) + +--- a/fs/anon_inodes.c ++++ b/fs/anon_inodes.c +@@ -24,10 +24,44 @@ + + #include + ++#include "internal.h" ++ + static struct vfsmount *anon_inode_mnt __ro_after_init; + static struct inode *anon_inode_inode __ro_after_init; + + /* ++ * User space expects anonymous inodes to have no file type in st_mode. ++ * ++ * In particular, 'lsof' has this legacy logic: ++ * ++ * type = s->st_mode & S_IFMT; ++ * switch (type) { ++ * ... ++ * case 0: ++ * if (!strcmp(p, "anon_inode")) ++ * Lf->ntype = Ntype = N_ANON_INODE; ++ * ++ * to detect our old anon_inode logic. ++ * ++ * Rather than mess with our internal sane inode data, just fix it ++ * up here in getattr() by masking off the format bits. ++ */ ++int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path, ++ struct kstat *stat, u32 request_mask, ++ unsigned int query_flags) ++{ ++ struct inode *inode = d_inode(path->dentry); ++ ++ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); ++ stat->mode &= ~S_IFMT; ++ return 0; ++} ++ ++static const struct inode_operations anon_inode_operations = { ++ .getattr = anon_inode_getattr, ++}; ++ ++/* + * anon_inodefs_dname() is called from d_path(). + */ + static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen) +@@ -66,6 +100,7 @@ static struct inode *anon_inode_make_sec + if (IS_ERR(inode)) + return inode; + inode->i_flags &= ~S_PRIVATE; ++ inode->i_op = &anon_inode_operations; + error = security_inode_init_security_anon(inode, &QSTR(name), + context_inode); + if (error) { +@@ -313,6 +348,7 @@ static int __init anon_inode_init(void) + anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb); + if (IS_ERR(anon_inode_inode)) + panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode)); ++ anon_inode_inode->i_op = &anon_inode_operations; + + return 0; + } +--- a/fs/internal.h ++++ b/fs/internal.h +@@ -343,3 +343,6 @@ static inline bool path_mounted(const st + void file_f_owner_release(struct file *file); + bool file_seek_cur_needs_f_lock(struct file *file); + int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map); ++int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path, ++ struct kstat *stat, u32 request_mask, ++ unsigned int query_flags); +--- a/fs/libfs.c ++++ b/fs/libfs.c +@@ -1647,7 +1647,13 @@ struct inode *alloc_anon_inode(struct su + * that it already _is_ on the dirty list. + */ + inode->i_state = I_DIRTY; +- inode->i_mode = S_IRUSR | S_IWUSR; ++ /* ++ * Historically anonymous inodes didn't have a type at all and ++ * userspace has come to rely on this. Internally they're just ++ * regular files but S_IFREG is masked off when reporting ++ * information to userspace. ++ */ ++ inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); + inode->i_flags |= S_PRIVATE; diff --git a/debian/patches/patchset-pf/fixes/0006-anon_inode-explicitly-block-setattr.patch b/debian/patches/patchset-pf/fixes/0006-anon_inode-explicitly-block-setattr.patch new file mode 100644 index 0000000..9ec70fe --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0006-anon_inode-explicitly-block-setattr.patch @@ -0,0 +1,80 @@ +From ea4199112ae6d8da866417f50e035be01488c502 Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Mon, 7 Apr 2025 11:54:17 +0200 +Subject: anon_inode: explicitly block ->setattr() + +It is currently possible to change the mode and owner of the single +anonymous inode in the kernel: + +int main(int argc, char *argv[]) +{ + int ret, sfd; + sigset_t mask; + struct signalfd_siginfo fdsi; + + sigemptyset(&mask); + sigaddset(&mask, SIGINT); + sigaddset(&mask, SIGQUIT); + + ret = sigprocmask(SIG_BLOCK, &mask, NULL); + if (ret < 0) + _exit(1); + + sfd = signalfd(-1, &mask, 0); + if (sfd < 0) + _exit(2); + + ret = fchown(sfd, 5555, 5555); + if (ret < 0) + _exit(3); + + ret = fchmod(sfd, 0777); + if (ret < 0) + _exit(3); + + _exit(4); +} + +This is a bug. It's not really a meaningful one because anonymous inodes +don't really figure into path lookup and they cannot be reopened via +/proc//fd/ and can't be used for lookup itself. So they can +only ever serve as direct references. + +But it is still completely bogus to allow the mode and ownership or any +of the properties of the anonymous inode to be changed. Block this! + +Link: https://lore.kernel.org/20250407-work-anon_inode-v1-3-53a44c20d44e@kernel.org +Reviewed-by: Jeff Layton +Cc: stable@vger.kernel.org # all LTS kernels +Signed-off-by: Christian Brauner +--- + fs/anon_inodes.c | 7 +++++++ + fs/internal.h | 2 ++ + 2 files changed, 9 insertions(+) + +--- a/fs/anon_inodes.c ++++ b/fs/anon_inodes.c +@@ -57,8 +57,15 @@ int anon_inode_getattr(struct mnt_idmap + return 0; + } + ++int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, ++ struct iattr *attr) ++{ ++ return -EOPNOTSUPP; ++} ++ + static const struct inode_operations anon_inode_operations = { + .getattr = anon_inode_getattr, ++ .setattr = anon_inode_setattr, + }; + + /* +--- a/fs/internal.h ++++ b/fs/internal.h +@@ -346,3 +346,5 @@ int statmount_mnt_idmap(struct mnt_idmap + int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path, + struct kstat *stat, u32 request_mask, + unsigned int query_flags); ++int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, ++ struct iattr *attr); diff --git a/debian/patches/patchset-pf/fixes/0007-anon_inode-raise-SB_I_NODEV-and-SB_I_NOEXEC.patch b/debian/patches/patchset-pf/fixes/0007-anon_inode-raise-SB_I_NODEV-and-SB_I_NOEXEC.patch new file mode 100644 index 0000000..e9139cc --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0007-anon_inode-raise-SB_I_NODEV-and-SB_I_NOEXEC.patch @@ -0,0 +1,39 @@ +From 79f54c5bc7c6097a379c83e9ed56bee27cf1218a Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Mon, 7 Apr 2025 11:54:19 +0200 +Subject: anon_inode: raise SB_I_NODEV and SB_I_NOEXEC + +It isn't possible to execute anonymous inodes because they cannot be +opened in any way after they have been created. This includes execution: + +execveat(fd_anon_inode, "", NULL, NULL, AT_EMPTY_PATH) + +Anonymous inodes have inode->f_op set to no_open_fops which sets +no_open() which returns ENXIO. That means any call to do_dentry_open() +which is the endpoint of the do_open_execat() will fail. There's no +chance to execute an anonymous inode. Unless a given subsystem overrides +it ofc. + +However, we should still harden this and raise SB_I_NODEV and +SB_I_NOEXEC on the superblock itself so that no one gets any creative +ideas. + +Link: https://lore.kernel.org/20250407-work-anon_inode-v1-5-53a44c20d44e@kernel.org +Reviewed-by: Jeff Layton +Cc: stable@vger.kernel.org # all LTS kernels +Signed-off-by: Christian Brauner +--- + fs/anon_inodes.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/anon_inodes.c ++++ b/fs/anon_inodes.c +@@ -86,6 +86,8 @@ static int anon_inodefs_init_fs_context( + struct pseudo_fs_context *ctx = init_pseudo(fc, ANON_INODE_FS_MAGIC); + if (!ctx) + return -ENOMEM; ++ fc->s_iflags |= SB_I_NOEXEC; ++ fc->s_iflags |= SB_I_NODEV; + ctx->dops = &anon_inodefs_dentry_operations; + return 0; + } diff --git a/debian/patches/patchset-pf/fixes/0008-fs-add-S_ANON_INODE.patch b/debian/patches/patchset-pf/fixes/0008-fs-add-S_ANON_INODE.patch new file mode 100644 index 0000000..048c3ba --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0008-fs-add-S_ANON_INODE.patch @@ -0,0 +1,136 @@ +From edaacbee0f33b7371ec460723d1042a6c5a4bb9d Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Mon, 21 Apr 2025 10:27:40 +0200 +Subject: fs: add S_ANON_INODE + +This makes it easy to detect proper anonymous inodes and to ensure that +we can detect them in codepaths such as readahead(). + +Readahead on anonymous inodes didn't work because they didn't have a +proper mode. Now that they have we need to retain EINVAL being returned +otherwise LTP will fail. + +We also need to ensure that ioctls aren't simply fired like they are for +regular files so things like inotify inodes continue to correctly call +their own ioctl handlers as in [1]. + +Reported-by: Xilin Wu +Link: https://lore.kernel.org/3A9139D5CD543962+89831381-31b9-4392-87ec-a84a5b3507d8@radxa.com [1] +Link: https://lore.kernel.org/7a1a7076-ff6b-4cb0-94e7-7218a0a44028@sirena.org.uk +Signed-off-by: Christian Brauner +--- + fs/ioctl.c | 7 ++++--- + fs/libfs.c | 2 +- + fs/pidfs.c | 2 +- + include/linux/fs.h | 2 ++ + mm/readahead.c | 20 ++++++++++++++++---- + 5 files changed, 24 insertions(+), 9 deletions(-) + +--- a/fs/ioctl.c ++++ b/fs/ioctl.c +@@ -821,7 +821,8 @@ static int do_vfs_ioctl(struct file *fil + return ioctl_fioasync(fd, filp, argp); + + case FIOQSIZE: +- if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) || ++ if (S_ISDIR(inode->i_mode) || ++ (S_ISREG(inode->i_mode) && !IS_ANON_FILE(inode)) || + S_ISLNK(inode->i_mode)) { + loff_t res = inode_get_bytes(inode); + return copy_to_user(argp, &res, sizeof(res)) ? +@@ -856,7 +857,7 @@ static int do_vfs_ioctl(struct file *fil + return ioctl_file_dedupe_range(filp, argp); + + case FIONREAD: +- if (!S_ISREG(inode->i_mode)) ++ if (!S_ISREG(inode->i_mode) || IS_ANON_FILE(inode)) + return vfs_ioctl(filp, cmd, arg); + + return put_user(i_size_read(inode) - filp->f_pos, +@@ -881,7 +882,7 @@ static int do_vfs_ioctl(struct file *fil + return ioctl_get_fs_sysfs_path(filp, argp); + + default: +- if (S_ISREG(inode->i_mode)) ++ if (S_ISREG(inode->i_mode) && !IS_ANON_FILE(inode)) + return file_ioctl(filp, cmd, argp); + break; + } +--- a/fs/libfs.c ++++ b/fs/libfs.c +@@ -1656,7 +1656,7 @@ struct inode *alloc_anon_inode(struct su + inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); +- inode->i_flags |= S_PRIVATE; ++ inode->i_flags |= S_PRIVATE | S_ANON_INODE; + simple_inode_init_ts(inode); + return inode; + } +--- a/fs/pidfs.c ++++ b/fs/pidfs.c +@@ -826,7 +826,7 @@ static int pidfs_init_inode(struct inode + const struct pid *pid = data; + + inode->i_private = data; +- inode->i_flags |= S_PRIVATE; ++ inode->i_flags |= S_PRIVATE | S_ANON_INODE; + inode->i_mode |= S_IRWXU; + inode->i_op = &pidfs_inode_operations; + inode->i_fop = &pidfs_file_operations; +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -2344,6 +2344,7 @@ struct super_operations { + #define S_CASEFOLD (1 << 15) /* Casefolded file */ + #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ + #define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */ ++#define S_ANON_INODE (1 << 19) /* Inode is an anonymous inode */ + + /* + * Note that nosuid etc flags are inode-specific: setting some file-system +@@ -2400,6 +2401,7 @@ static inline bool sb_rdonly(const struc + + #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ + (inode)->i_rdev == WHITEOUT_DEV) ++#define IS_ANON_FILE(inode) ((inode)->i_flags & S_ANON_INODE) + + static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap, + struct inode *inode) +--- a/mm/readahead.c ++++ b/mm/readahead.c +@@ -690,9 +690,15 @@ EXPORT_SYMBOL_GPL(page_cache_async_ra); + + ssize_t ksys_readahead(int fd, loff_t offset, size_t count) + { ++ struct file *file; ++ const struct inode *inode; ++ + CLASS(fd, f)(fd); ++ if (fd_empty(f)) ++ return -EBADF; + +- if (fd_empty(f) || !(fd_file(f)->f_mode & FMODE_READ)) ++ file = fd_file(f); ++ if (!(file->f_mode & FMODE_READ)) + return -EBADF; + + /* +@@ -700,9 +706,15 @@ ssize_t ksys_readahead(int fd, loff_t of + * that can execute readahead. If readahead is not possible + * on this file, then we must return -EINVAL. + */ +- if (!fd_file(f)->f_mapping || !fd_file(f)->f_mapping->a_ops || +- (!S_ISREG(file_inode(fd_file(f))->i_mode) && +- !S_ISBLK(file_inode(fd_file(f))->i_mode))) ++ if (!file->f_mapping) ++ return -EINVAL; ++ if (!file->f_mapping->a_ops) ++ return -EINVAL; ++ ++ inode = file_inode(file); ++ if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) ++ return -EINVAL; ++ if (IS_ANON_FILE(inode)) + return -EINVAL; + + return vfs_fadvise(fd_file(f), offset, count, POSIX_FADV_WILLNEED); diff --git a/debian/patches/patchset-pf/fixes/0009-configfs-Do-not-override-creating-attribute-file-fai.patch b/debian/patches/patchset-pf/fixes/0009-configfs-Do-not-override-creating-attribute-file-fai.patch new file mode 100644 index 0000000..7267733 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0009-configfs-Do-not-override-creating-attribute-file-fai.patch @@ -0,0 +1,35 @@ +From ab287d709809b6dfe4d3c42016a543d976533d51 Mon Sep 17 00:00:00 2001 +From: Zijun Hu +Date: Wed, 7 May 2025 19:50:26 +0800 +Subject: configfs: Do not override creating attribute file failure in + populate_attrs() + +populate_attrs() may override failure for creating attribute files +by success for creating subsequent bin attribute files, and have +wrong return value. + +Fix by creating bin attribute files under successfully creating +attribute files. + +Fixes: 03607ace807b ("configfs: implement binary attributes") +Cc: stable@vger.kernel.org +Reviewed-by: Joel Becker +Reviewed-by: Breno Leitao +Signed-off-by: Zijun Hu +Link: https://lore.kernel.org/r/20250507-fix_configfs-v3-2-fe2d96de8dc4@quicinc.com +Signed-off-by: Andreas Hindborg +--- + fs/configfs/dir.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/configfs/dir.c ++++ b/fs/configfs/dir.c +@@ -619,7 +619,7 @@ static int populate_attrs(struct config_ + break; + } + } +- if (t->ct_bin_attrs) { ++ if (!error && t->ct_bin_attrs) { + for (i = 0; (bin_attr = t->ct_bin_attrs[i]) != NULL; i++) { + if (ops && ops->is_bin_visible && !ops->is_bin_visible(item, bin_attr, i)) + continue; diff --git a/debian/patches/patchset-pf/fixes/0010-Don-t-propagate-mounts-into-detached-trees.patch b/debian/patches/patchset-pf/fixes/0010-Don-t-propagate-mounts-into-detached-trees.patch new file mode 100644 index 0000000..9912527 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0010-Don-t-propagate-mounts-into-detached-trees.patch @@ -0,0 +1,104 @@ +From 896b7b0d6ed53a7fe159c4b76f25407c816aa619 Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Fri, 23 May 2025 19:20:36 -0400 +Subject: Don't propagate mounts into detached trees + +All versions up to 6.14 did not propagate mount events into detached +tree. Shortly after 6.14 a merge of vfs-6.15-rc1.mount.namespace +(130e696aa68b) has changed that. + +Unfortunately, that has caused userland regressions (reported in +https://lore.kernel.org/all/CAOYeF9WQhFDe+BGW=Dp5fK8oRy5AgZ6zokVyTj1Wp4EUiYgt4w@mail.gmail.com/) + +Straight revert wouldn't be an option - in particular, the variant in 6.14 +had a bug that got fixed in d1ddc6f1d9f0 ("fix IS_MNT_PROPAGATING uses") +and we don't want to bring the bug back. + +This is a modification of manual revert posted by Christian, with changes +needed to avoid reintroducing the breakage in scenario described in +d1ddc6f1d9f0. + +Cc: stable@vger.kernel.org +Reported-by: Allison Karlitskaya +Tested-by: Allison Karlitskaya +Acked-by: Christian Brauner +Co-developed-by: Christian Brauner +Signed-off-by: Al Viro +--- + fs/mount.h | 5 ----- + fs/namespace.c | 15 ++------------- + fs/pnode.c | 4 ++-- + 3 files changed, 4 insertions(+), 20 deletions(-) + +--- a/fs/mount.h ++++ b/fs/mount.h +@@ -7,10 +7,6 @@ + + extern struct list_head notify_list; + +-typedef __u32 __bitwise mntns_flags_t; +- +-#define MNTNS_PROPAGATING ((__force mntns_flags_t)(1 << 0)) +- + struct mnt_namespace { + struct ns_common ns; + struct mount * root; +@@ -37,7 +33,6 @@ struct mnt_namespace { + struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */ + struct list_head mnt_ns_list; /* entry in the sequential list of mounts namespace */ + refcount_t passive; /* number references not pinning @mounts */ +- mntns_flags_t mntns_flags; + } __randomize_layout; + + struct mnt_pcp { +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -3648,7 +3648,7 @@ static int do_move_mount(struct path *ol + if (!(attached ? check_mnt(old) : is_anon_ns(ns))) + goto out; + +- if (is_anon_ns(ns)) { ++ if (is_anon_ns(ns) && ns == p->mnt_ns) { + /* + * Ending up with two files referring to the root of the + * same anonymous mount namespace would cause an error +@@ -3656,16 +3656,7 @@ static int do_move_mount(struct path *ol + * twice into the mount tree which would be rejected + * later. But be explicit about it right here. + */ +- if ((is_anon_ns(p->mnt_ns) && ns == p->mnt_ns)) +- goto out; +- +- /* +- * If this is an anonymous mount tree ensure that mount +- * propagation can detect mounts that were just +- * propagated to the target mount tree so we don't +- * propagate onto them. +- */ +- ns->mntns_flags |= MNTNS_PROPAGATING; ++ goto out; + } else if (is_anon_ns(p->mnt_ns)) { + /* + * Don't allow moving an attached mount tree to an +@@ -3722,8 +3713,6 @@ static int do_move_mount(struct path *ol + if (attached) + put_mountpoint(old_mp); + out: +- if (is_anon_ns(ns)) +- ns->mntns_flags &= ~MNTNS_PROPAGATING; + unlock_mount(mp); + if (!err) { + if (attached) { +--- a/fs/pnode.c ++++ b/fs/pnode.c +@@ -231,8 +231,8 @@ static int propagate_one(struct mount *m + /* skip if mountpoint isn't visible in m */ + if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) + return 0; +- /* skip if m is in the anon_ns we are emptying */ +- if (m->mnt_ns->mntns_flags & MNTNS_PROPAGATING) ++ /* skip if m is in the anon_ns */ ++ if (is_anon_ns(m->mnt_ns)) + return 0; + + if (peers(m, last_dest)) { diff --git a/debian/patches/patchset-pf/fixes/0011-mm-filemap-gate-dropbehind-invalidate-on-folio-dirty.patch b/debian/patches/patchset-pf/fixes/0011-mm-filemap-gate-dropbehind-invalidate-on-folio-dirty.patch new file mode 100644 index 0000000..ea59300 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0011-mm-filemap-gate-dropbehind-invalidate-on-folio-dirty.patch @@ -0,0 +1,51 @@ +From bc86aaf0e0256220ca787fdbb57a73429ade1129 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Tue, 27 May 2025 07:28:52 -0600 +Subject: mm/filemap: gate dropbehind invalidate on folio !dirty && !writeback + +It's possible for the folio to either get marked for writeback or +redirtied. Add a helper, filemap_end_dropbehind(), which guards the +folio_unmap_invalidate() call behind check for the folio being both +non-dirty and not under writeback AFTER the folio lock has been +acquired. Use this helper folio_end_dropbehind_write(). + +Cc: stable@vger.kernel.org +Reported-by: Al Viro +Fixes: fb7d3bc41493 ("mm/filemap: drop streaming/uncached pages when writeback completes") +Link: https://lore.kernel.org/linux-fsdevel/20250525083209.GS2023217@ZenIV/ +Signed-off-by: Jens Axboe +Link: https://lore.kernel.org/20250527133255.452431-2-axboe@kernel.dk +Signed-off-by: Christian Brauner +--- + mm/filemap.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -1589,6 +1589,16 @@ int folio_wait_private_2_killable(struct + } + EXPORT_SYMBOL(folio_wait_private_2_killable); + ++static void filemap_end_dropbehind(struct folio *folio) ++{ ++ struct address_space *mapping = folio->mapping; ++ ++ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); ++ ++ if (mapping && !folio_test_writeback(folio) && !folio_test_dirty(folio)) ++ folio_unmap_invalidate(mapping, folio, 0); ++} ++ + /* + * If folio was marked as dropbehind, then pages should be dropped when writeback + * completes. Do that now. If we fail, it's likely because of a big folio - +@@ -1604,8 +1614,7 @@ static void folio_end_dropbehind_write(s + * invalidation in that case. + */ + if (in_task() && folio_trylock(folio)) { +- if (folio->mapping) +- folio_unmap_invalidate(folio->mapping, folio, 0); ++ filemap_end_dropbehind(folio); + folio_unlock(folio); + } + } diff --git a/debian/patches/patchset-pf/fixes/0012-mm-filemap-use-filemap_end_dropbehind-for-read-inval.patch b/debian/patches/patchset-pf/fixes/0012-mm-filemap-use-filemap_end_dropbehind-for-read-inval.patch new file mode 100644 index 0000000..2306297 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0012-mm-filemap-use-filemap_end_dropbehind-for-read-inval.patch @@ -0,0 +1,51 @@ +From fad76185ca91983990c660642151083eb05cbfc0 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Tue, 27 May 2025 07:28:53 -0600 +Subject: mm/filemap: use filemap_end_dropbehind() for read invalidation + +Use the filemap_end_dropbehind() helper rather than calling +folio_unmap_invalidate() directly, as we need to check if the folio has +been redirtied or marked for writeback once the folio lock has been +re-acquired. + +Cc: stable@vger.kernel.org +Reported-by: Trond Myklebust +Fixes: 8026e49bff9b ("mm/filemap: add read support for RWF_DONTCACHE") +Link: https://lore.kernel.org/linux-fsdevel/ba8a9805331ce258a622feaca266b163db681a10.camel@hammerspace.com/ +Signed-off-by: Jens Axboe +Link: https://lore.kernel.org/20250527133255.452431-3-axboe@kernel.dk +Signed-off-by: Christian Brauner +--- + mm/filemap.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -2644,8 +2644,7 @@ static inline bool pos_same_folio(loff_t + return (pos1 >> shift == pos2 >> shift); + } + +-static void filemap_end_dropbehind_read(struct address_space *mapping, +- struct folio *folio) ++static void filemap_end_dropbehind_read(struct folio *folio) + { + if (!folio_test_dropbehind(folio)) + return; +@@ -2653,7 +2652,7 @@ static void filemap_end_dropbehind_read( + return; + if (folio_trylock(folio)) { + if (folio_test_clear_dropbehind(folio)) +- folio_unmap_invalidate(mapping, folio, 0); ++ filemap_end_dropbehind(folio); + folio_unlock(folio); + } + } +@@ -2774,7 +2773,7 @@ put_folios: + for (i = 0; i < folio_batch_count(&fbatch); i++) { + struct folio *folio = fbatch.folios[i]; + +- filemap_end_dropbehind_read(mapping, folio); ++ filemap_end_dropbehind_read(folio); + folio_put(folio); + } + folio_batch_init(&fbatch); diff --git a/debian/patches/patchset-pf/fixes/0013-Revert-Disable-FOP_DONTCACHE-for-now-due-to-bugs.patch b/debian/patches/patchset-pf/fixes/0013-Revert-Disable-FOP_DONTCACHE-for-now-due-to-bugs.patch new file mode 100644 index 0000000..3579f44 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0013-Revert-Disable-FOP_DONTCACHE-for-now-due-to-bugs.patch @@ -0,0 +1,29 @@ +From f0579d45f2e03fa3ba0d9466e79a31ea37acb487 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Tue, 27 May 2025 07:28:54 -0600 +Subject: Revert "Disable FOP_DONTCACHE for now due to bugs" + +This reverts commit 478ad02d6844217cc7568619aeb0809d93ade43d. + +Both the read and write side dirty && writeback races should be resolved +now, revert the commit that disabled FOP_DONTCACHE for filesystems. + +Link: https://lore.kernel.org/linux-fsdevel/20250525083209.GS2023217@ZenIV/ +Signed-off-by: Jens Axboe +Link: https://lore.kernel.org/20250527133255.452431-4-axboe@kernel.dk +Signed-off-by: Christian Brauner +--- + include/linux/fs.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -2186,7 +2186,7 @@ struct file_operations { + /* Supports asynchronous lock callbacks */ + #define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6)) + /* File system supports uncached read/write buffered IO */ +-#define FOP_DONTCACHE 0 /* ((__force fop_flags_t)(1 << 7)) */ ++#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7)) + + /* Wrap a directory iterator that needs exclusive inode access */ + int wrap_directory_iterator(struct file *, struct dir_context *, diff --git a/debian/patches/patchset-pf/fixes/0014-mm-filemap-unify-read-write-dropbehind-naming.patch b/debian/patches/patchset-pf/fixes/0014-mm-filemap-unify-read-write-dropbehind-naming.patch new file mode 100644 index 0000000..2410a52 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0014-mm-filemap-unify-read-write-dropbehind-naming.patch @@ -0,0 +1,36 @@ +From 3b4614564770691cf3a6eb88127268ef6a84180c Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Tue, 27 May 2025 07:28:55 -0600 +Subject: mm/filemap: unify read/write dropbehind naming + +The read side is filemap_end_dropbehind_read(), while the write side +used folio_ as the prefix rather than filemap_. The read side makes more +sense, unify the naming such that the write side follows that. + +Signed-off-by: Jens Axboe +Link: https://lore.kernel.org/20250527133255.452431-5-axboe@kernel.dk +Signed-off-by: Christian Brauner +--- + mm/filemap.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -1604,7 +1604,7 @@ static void filemap_end_dropbehind(struc + * completes. Do that now. If we fail, it's likely because of a big folio - + * just reset dropbehind for that case and latter completions should invalidate. + */ +-static void folio_end_dropbehind_write(struct folio *folio) ++static void filemap_end_dropbehind_write(struct folio *folio) + { + /* + * Hitting !in_task() should not happen off RWF_DONTCACHE writeback, +@@ -1659,7 +1659,7 @@ void folio_end_writeback(struct folio *f + acct_reclaim_writeback(folio); + + if (folio_dropbehind) +- folio_end_dropbehind_write(folio); ++ filemap_end_dropbehind_write(folio); + folio_put(folio); + } + EXPORT_SYMBOL(folio_end_writeback); diff --git a/debian/patches/patchset-pf/fixes/0015-mm-filemap-unify-dropbehind-flag-testing-and-clearin.patch b/debian/patches/patchset-pf/fixes/0015-mm-filemap-unify-dropbehind-flag-testing-and-clearin.patch new file mode 100644 index 0000000..de06ef7 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0015-mm-filemap-unify-dropbehind-flag-testing-and-clearin.patch @@ -0,0 +1,78 @@ +From 6003153e1bc4ad4952773081d7b89aa1ab2274c3 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Tue, 27 May 2025 07:28:56 -0600 +Subject: mm/filemap: unify dropbehind flag testing and clearing + +The read and write side does this a bit differently, unify it such that +the _{read,write} helpers check the bit before locking, and the generic +handler is in charge of clearing the bit and invalidating, once under +the folio lock. + +Signed-off-by: Jens Axboe +Link: https://lore.kernel.org/20250527133255.452431-6-axboe@kernel.dk +Signed-off-by: Christian Brauner +--- + mm/filemap.c | 21 +++++++++++---------- + 1 file changed, 11 insertions(+), 10 deletions(-) + +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -1595,7 +1595,11 @@ static void filemap_end_dropbehind(struc + + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + +- if (mapping && !folio_test_writeback(folio) && !folio_test_dirty(folio)) ++ if (folio_test_writeback(folio) || folio_test_dirty(folio)) ++ return; ++ if (!folio_test_clear_dropbehind(folio)) ++ return; ++ if (mapping) + folio_unmap_invalidate(mapping, folio, 0); + } + +@@ -1606,6 +1610,9 @@ static void filemap_end_dropbehind(struc + */ + static void filemap_end_dropbehind_write(struct folio *folio) + { ++ if (!folio_test_dropbehind(folio)) ++ return; ++ + /* + * Hitting !in_task() should not happen off RWF_DONTCACHE writeback, + * but can happen if normal writeback just happens to find dirty folios +@@ -1629,8 +1636,6 @@ static void filemap_end_dropbehind_write + */ + void folio_end_writeback(struct folio *folio) + { +- bool folio_dropbehind = false; +- + VM_BUG_ON_FOLIO(!folio_test_writeback(folio), folio); + + /* +@@ -1652,14 +1657,11 @@ void folio_end_writeback(struct folio *f + * reused before the folio_wake_bit(). + */ + folio_get(folio); +- if (!folio_test_dirty(folio)) +- folio_dropbehind = folio_test_clear_dropbehind(folio); + if (__folio_end_writeback(folio)) + folio_wake_bit(folio, PG_writeback); +- acct_reclaim_writeback(folio); + +- if (folio_dropbehind) +- filemap_end_dropbehind_write(folio); ++ filemap_end_dropbehind_write(folio); ++ acct_reclaim_writeback(folio); + folio_put(folio); + } + EXPORT_SYMBOL(folio_end_writeback); +@@ -2651,8 +2653,7 @@ static void filemap_end_dropbehind_read( + if (folio_test_writeback(folio) || folio_test_dirty(folio)) + return; + if (folio_trylock(folio)) { +- if (folio_test_clear_dropbehind(folio)) +- filemap_end_dropbehind(folio); ++ filemap_end_dropbehind(folio); + folio_unlock(folio); + } + } diff --git a/debian/patches/patchset-pf/fixes/0016-mm-khugepaged-fix-race-with-folio-split-free-using-t.patch b/debian/patches/patchset-pf/fixes/0016-mm-khugepaged-fix-race-with-folio-split-free-using-t.patch new file mode 100644 index 0000000..f600beb --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0016-mm-khugepaged-fix-race-with-folio-split-free-using-t.patch @@ -0,0 +1,98 @@ +From 61c0b2450f2b85c5053fa4f71d9c619b34d3af6c Mon Sep 17 00:00:00 2001 +From: Shivank Garg +Date: Mon, 26 May 2025 18:28:18 +0000 +Subject: mm/khugepaged: fix race with folio split/free using temporary + reference + +hpage_collapse_scan_file() calls is_refcount_suitable(), which in turn +calls folio_mapcount(). folio_mapcount() checks folio_test_large() before +proceeding to folio_large_mapcount(), but there is a race window where the +folio may get split/freed between these checks, triggering: + + VM_WARN_ON_FOLIO(!folio_test_large(folio), folio) + +Take a temporary reference to the folio in hpage_collapse_scan_file(). +This stabilizes the folio during refcount check and prevents incorrect +large folio detection due to concurrent split/free. Use helper +folio_expected_ref_count() + 1 to compare with folio_ref_count() instead +of using is_refcount_suitable(). + +Link: https://lkml.kernel.org/r/20250526182818.37978-1-shivankg@amd.com +Fixes: 05c5323b2a34 ("mm: track mapcount of large folios in single value") +Signed-off-by: Shivank Garg +Reported-by: syzbot+2b99589e33edbe9475ca@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/all/6828470d.a70a0220.38f255.000c.GAE@google.com +Suggested-by: David Hildenbrand +Acked-by: David Hildenbrand +Acked-by: Dev Jain +Reviewed-by: Baolin Wang +Cc: Bharata B Rao +Cc: Fengwei Yin +Cc: Liam Howlett +Cc: Lorenzo Stoakes +Cc: Mariano Pache +Cc: Ryan Roberts +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +--- + mm/khugepaged.c | 18 +++++++++++++++++- + 1 file changed, 17 insertions(+), 1 deletion(-) + +--- a/mm/khugepaged.c ++++ b/mm/khugepaged.c +@@ -2295,6 +2295,17 @@ static int hpage_collapse_scan_file(stru + continue; + } + ++ if (!folio_try_get(folio)) { ++ xas_reset(&xas); ++ continue; ++ } ++ ++ if (unlikely(folio != xas_reload(&xas))) { ++ folio_put(folio); ++ xas_reset(&xas); ++ continue; ++ } ++ + if (folio_order(folio) == HPAGE_PMD_ORDER && + folio->index == start) { + /* Maybe PMD-mapped */ +@@ -2305,23 +2316,27 @@ static int hpage_collapse_scan_file(stru + * it's safe to skip LRU and refcount checks before + * returning. + */ ++ folio_put(folio); + break; + } + + node = folio_nid(folio); + if (hpage_collapse_scan_abort(node, cc)) { + result = SCAN_SCAN_ABORT; ++ folio_put(folio); + break; + } + cc->node_load[node]++; + + if (!folio_test_lru(folio)) { + result = SCAN_PAGE_LRU; ++ folio_put(folio); + break; + } + +- if (!is_refcount_suitable(folio)) { ++ if (folio_expected_ref_count(folio) + 1 != folio_ref_count(folio)) { + result = SCAN_PAGE_COUNT; ++ folio_put(folio); + break; + } + +@@ -2333,6 +2348,7 @@ static int hpage_collapse_scan_file(stru + */ + + present += folio_nr_pages(folio); ++ folio_put(folio); + + if (need_resched()) { + xas_pause(&xas); diff --git a/debian/patches/patchset-pf/fixes/0017-mm-add-folio_expected_ref_count-for-reference-count-.patch b/debian/patches/patchset-pf/fixes/0017-mm-add-folio_expected_ref_count-for-reference-count-.patch new file mode 100644 index 0000000..eb9fdc1 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0017-mm-add-folio_expected_ref_count-for-reference-count-.patch @@ -0,0 +1,198 @@ +From 214092002cbd9945b7cc6314e76ec42b3f588c01 Mon Sep 17 00:00:00 2001 +From: Shivank Garg +Date: Wed, 30 Apr 2025 10:01:51 +0000 +Subject: mm: add folio_expected_ref_count() for reference count calculation + +Patch series " JFS: Implement migrate_folio for jfs_metapage_aops" v5. + +This patchset addresses a warning that occurs during memory compaction due +to JFS's missing migrate_folio operation. The warning was introduced by +commit 7ee3647243e5 ("migrate: Remove call to ->writepage") which added +explicit warnings when filesystem don't implement migrate_folio. + +The syzbot reported following [1]: + jfs_metapage_aops does not implement migrate_folio + WARNING: CPU: 1 PID: 5861 at mm/migrate.c:955 fallback_migrate_folio mm/migrate.c:953 [inline] + WARNING: CPU: 1 PID: 5861 at mm/migrate.c:955 move_to_new_folio+0x70e/0x840 mm/migrate.c:1007 + Modules linked in: + CPU: 1 UID: 0 PID: 5861 Comm: syz-executor280 Not tainted 6.15.0-rc1-next-20250411-syzkaller #0 PREEMPT(full) + Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 + RIP: 0010:fallback_migrate_folio mm/migrate.c:953 [inline] + RIP: 0010:move_to_new_folio+0x70e/0x840 mm/migrate.c:1007 + +To fix this issue, this series implement metapage_migrate_folio() for JFS +which handles both single and multiple metapages per page configurations. + +While most filesystems leverage existing migration implementations like +filemap_migrate_folio(), buffer_migrate_folio_norefs() or +buffer_migrate_folio() (which internally used folio_expected_refs()), +JFS's metapage architecture requires special handling of its private data +during migration. To support this, this series introduce the +folio_expected_ref_count(), which calculates external references to a +folio from page/swap cache, private data, and page table mappings. + +This standardized implementation replaces the previous ad-hoc +folio_expected_refs() function and enables JFS to accurately determine +whether a folio has unexpected references before attempting migration. + + + + +Implement folio_expected_ref_count() to calculate expected folio reference +counts from: +- Page/swap cache (1 per page) +- Private data (1) +- Page table mappings (1 per map) + +While originally needed for page migration operations, this improved +implementation standardizes reference counting by consolidating all +refcount contributors into a single, reusable function that can benefit +any subsystem needing to detect unexpected references to folios. + +The folio_expected_ref_count() returns the sum of these external +references without including any reference the caller itself might hold. +Callers comparing against the actual folio_ref_count() must account for +their own references separately. + +Link: https://syzkaller.appspot.com/bug?extid=8bb6fd945af4e0ad9299 [1] +Link: https://lkml.kernel.org/r/20250430100150.279751-1-shivankg@amd.com +Link: https://lkml.kernel.org/r/20250430100150.279751-2-shivankg@amd.com +Signed-off-by: David Hildenbrand +Signed-off-by: Shivank Garg +Suggested-by: Matthew Wilcox +Co-developed-by: David Hildenbrand +Cc: Alistair Popple +Cc: Dave Kleikamp +Cc: Donet Tom +Cc: Jane Chu +Cc: Kefeng Wang +Cc: Zi Yan +Signed-off-by: Andrew Morton +--- + include/linux/mm.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++ + mm/migrate.c | 22 ++++--------------- + 2 files changed, 59 insertions(+), 18 deletions(-) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -2307,6 +2307,61 @@ static inline bool folio_maybe_mapped_sh + return folio_test_large_maybe_mapped_shared(folio); + } + ++/** ++ * folio_expected_ref_count - calculate the expected folio refcount ++ * @folio: the folio ++ * ++ * Calculate the expected folio refcount, taking references from the pagecache, ++ * swapcache, PG_private and page table mappings into account. Useful in ++ * combination with folio_ref_count() to detect unexpected references (e.g., ++ * GUP or other temporary references). ++ * ++ * Does currently not consider references from the LRU cache. If the folio ++ * was isolated from the LRU (which is the case during migration or split), ++ * the LRU cache does not apply. ++ * ++ * Calling this function on an unmapped folio -- !folio_mapped() -- that is ++ * locked will return a stable result. ++ * ++ * Calling this function on a mapped folio will not result in a stable result, ++ * because nothing stops additional page table mappings from coming (e.g., ++ * fork()) or going (e.g., munmap()). ++ * ++ * Calling this function without the folio lock will also not result in a ++ * stable result: for example, the folio might get dropped from the swapcache ++ * concurrently. ++ * ++ * However, even when called without the folio lock or on a mapped folio, ++ * this function can be used to detect unexpected references early (for example, ++ * if it makes sense to even lock the folio and unmap it). ++ * ++ * The caller must add any reference (e.g., from folio_try_get()) it might be ++ * holding itself to the result. ++ * ++ * Returns the expected folio refcount. ++ */ ++static inline int folio_expected_ref_count(const struct folio *folio) ++{ ++ const int order = folio_order(folio); ++ int ref_count = 0; ++ ++ if (WARN_ON_ONCE(folio_test_slab(folio))) ++ return 0; ++ ++ if (folio_test_anon(folio)) { ++ /* One reference per page from the swapcache. */ ++ ref_count += folio_test_swapcache(folio) << order; ++ } else if (!((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS)) { ++ /* One reference per page from the pagecache. */ ++ ref_count += !!folio->mapping << order; ++ /* One reference from PG_private. */ ++ ref_count += folio_test_private(folio); ++ } ++ ++ /* One reference per page table mapping. */ ++ return ref_count + folio_mapcount(folio); ++} ++ + #ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE + static inline int arch_make_folio_accessible(struct folio *folio) + { +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -445,20 +445,6 @@ unlock: + } + #endif + +-static int folio_expected_refs(struct address_space *mapping, +- struct folio *folio) +-{ +- int refs = 1; +- if (!mapping) +- return refs; +- +- refs += folio_nr_pages(folio); +- if (folio_test_private(folio)) +- refs++; +- +- return refs; +-} +- + /* + * Replace the folio in the mapping. + * +@@ -601,7 +587,7 @@ static int __folio_migrate_mapping(struc + int folio_migrate_mapping(struct address_space *mapping, + struct folio *newfolio, struct folio *folio, int extra_count) + { +- int expected_count = folio_expected_refs(mapping, folio) + extra_count; ++ int expected_count = folio_expected_ref_count(folio) + extra_count + 1; + + if (folio_ref_count(folio) != expected_count) + return -EAGAIN; +@@ -618,7 +604,7 @@ int migrate_huge_page_move_mapping(struc + struct folio *dst, struct folio *src) + { + XA_STATE(xas, &mapping->i_pages, folio_index(src)); +- int rc, expected_count = folio_expected_refs(mapping, src); ++ int rc, expected_count = folio_expected_ref_count(src) + 1; + + if (folio_ref_count(src) != expected_count) + return -EAGAIN; +@@ -749,7 +735,7 @@ static int __migrate_folio(struct addres + struct folio *src, void *src_private, + enum migrate_mode mode) + { +- int rc, expected_count = folio_expected_refs(mapping, src); ++ int rc, expected_count = folio_expected_ref_count(src) + 1; + + /* Check whether src does not have extra refs before we do more work */ + if (folio_ref_count(src) != expected_count) +@@ -837,7 +823,7 @@ static int __buffer_migrate_folio(struct + return migrate_folio(mapping, dst, src, mode); + + /* Check whether page does not have extra refs before we do more work */ +- expected_count = folio_expected_refs(mapping, src); ++ expected_count = folio_expected_ref_count(src) + 1; + if (folio_ref_count(src) != expected_count) + return -EAGAIN; + diff --git a/debian/patches/patchset-pf/fixes/0018-mm-fix-uprobe-pte-be-overwritten-when-expanding-vma.patch b/debian/patches/patchset-pf/fixes/0018-mm-fix-uprobe-pte-be-overwritten-when-expanding-vma.patch new file mode 100644 index 0000000..0783b91 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0018-mm-fix-uprobe-pte-be-overwritten-when-expanding-vma.patch @@ -0,0 +1,129 @@ +From 0f52f05148589fe4115322a9cc8ffab760091a0a Mon Sep 17 00:00:00 2001 +From: Pu Lehui +Date: Thu, 29 May 2025 15:56:47 +0000 +Subject: mm: fix uprobe pte be overwritten when expanding vma + +Patch series "Fix uprobe pte be overwritten when expanding vma". + + +This patch (of 4): + +We encountered a BUG alert triggered by Syzkaller as follows: + BUG: Bad rss-counter state mm:00000000b4a60fca type:MM_ANONPAGES val:1 + +And we can reproduce it with the following steps: +1. register uprobe on file at zero offset +2. mmap the file at zero offset: + addr1 = mmap(NULL, 2 * 4096, PROT_NONE, MAP_PRIVATE, fd, 0); +3. mremap part of vma1 to new vma2: + addr2 = mremap(addr1, 4096, 2 * 4096, MREMAP_MAYMOVE); +4. mremap back to orig addr1: + mremap(addr2, 4096, 4096, MREMAP_MAYMOVE | MREMAP_FIXED, addr1); + +In step 3, the vma1 range [addr1, addr1 + 4096] will be remap to new vma2 +with range [addr2, addr2 + 8192], and remap uprobe anon page from the vma1 +to vma2, then unmap the vma1 range [addr1, addr1 + 4096]. + +In step 4, the vma2 range [addr2, addr2 + 4096] will be remap back to the +addr range [addr1, addr1 + 4096]. Since the addr range [addr1 + 4096, +addr1 + 8192] still maps the file, it will take vma_merge_new_range to +expand the range, and then do uprobe_mmap in vma_complete. Since the +merged vma pgoff is also zero offset, it will install uprobe anon page to +the merged vma. However, the upcomming move_page_tables step, which use +set_pte_at to remap the vma2 uprobe pte to the merged vma, will overwrite +the newly uprobe pte in the merged vma, and lead that pte to be orphan. + +Since the uprobe pte will be remapped to the merged vma, we can remove the +unnecessary uprobe_mmap upon merged vma. + +This problem was first found in linux-6.6.y and also exists in the +community syzkaller: +https://lore.kernel.org/all/000000000000ada39605a5e71711@google.com/T/ + +Link: https://lkml.kernel.org/r/20250529155650.4017699-1-pulehui@huaweicloud.com +Link: https://lkml.kernel.org/r/20250529155650.4017699-2-pulehui@huaweicloud.com +Fixes: 2b1444983508 ("uprobes, mm, x86: Add the ability to install and remove uprobes breakpoints") +Signed-off-by: Pu Lehui +Suggested-by: Lorenzo Stoakes +Reviewed-by: Lorenzo Stoakes +Acked-by: David Hildenbrand +Cc: Jann Horn +Cc: Liam Howlett +Cc: "Masami Hiramatsu (Google)" +Cc: Oleg Nesterov +Cc: Peter Zijlstra +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +--- + mm/vma.c | 20 +++++++++++++++++--- + mm/vma.h | 7 +++++++ + 2 files changed, 24 insertions(+), 3 deletions(-) + +--- a/mm/vma.c ++++ b/mm/vma.c +@@ -144,6 +144,9 @@ static void init_multi_vma_prep(struct v + vp->file = vma->vm_file; + if (vp->file) + vp->mapping = vma->vm_file->f_mapping; ++ ++ if (vmg && vmg->skip_vma_uprobe) ++ vp->skip_vma_uprobe = true; + } + + /* +@@ -333,10 +336,13 @@ static void vma_complete(struct vma_prep + + if (vp->file) { + i_mmap_unlock_write(vp->mapping); +- uprobe_mmap(vp->vma); + +- if (vp->adj_next) +- uprobe_mmap(vp->adj_next); ++ if (!vp->skip_vma_uprobe) { ++ uprobe_mmap(vp->vma); ++ ++ if (vp->adj_next) ++ uprobe_mmap(vp->adj_next); ++ } + } + + if (vp->remove) { +@@ -1783,6 +1789,14 @@ struct vm_area_struct *copy_vma(struct v + faulted_in_anon_vma = false; + } + ++ /* ++ * If the VMA we are copying might contain a uprobe PTE, ensure ++ * that we do not establish one upon merge. Otherwise, when mremap() ++ * moves page tables, it will orphan the newly created PTE. ++ */ ++ if (vma->vm_file) ++ vmg.skip_vma_uprobe = true; ++ + new_vma = find_vma_prev(mm, addr, &vmg.prev); + if (new_vma && new_vma->vm_start < addr + len) + return NULL; /* should never get here */ +--- a/mm/vma.h ++++ b/mm/vma.h +@@ -19,6 +19,8 @@ struct vma_prepare { + struct vm_area_struct *insert; + struct vm_area_struct *remove; + struct vm_area_struct *remove2; ++ ++ bool skip_vma_uprobe :1; + }; + + struct unlink_vma_file_batch { +@@ -120,6 +122,11 @@ struct vma_merge_struct { + */ + bool give_up_on_oom :1; + ++ /* ++ * If set, skip uprobe_mmap upon merged vma. ++ */ ++ bool skip_vma_uprobe :1; ++ + /* Internal flags set during merge process: */ + + /* diff --git a/debian/patches/patchset-pf/fixes/0019-mm-hugetlb-unshare-page-tables-during-VMA-split-not-.patch b/debian/patches/patchset-pf/fixes/0019-mm-hugetlb-unshare-page-tables-during-VMA-split-not-.patch new file mode 100644 index 0000000..0ec9477 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0019-mm-hugetlb-unshare-page-tables-during-VMA-split-not-.patch @@ -0,0 +1,217 @@ +From 6f1e03b94f7777323aaefd9286d992a1cbd0adf7 Mon Sep 17 00:00:00 2001 +From: Jann Horn +Date: Tue, 27 May 2025 23:23:53 +0200 +Subject: mm/hugetlb: unshare page tables during VMA split, not before + +Currently, __split_vma() triggers hugetlb page table unsharing through +vm_ops->may_split(). This happens before the VMA lock and rmap locks are +taken - which is too early, it allows racing VMA-locked page faults in our +process and racing rmap walks from other processes to cause page tables to +be shared again before we actually perform the split. + +Fix it by explicitly calling into the hugetlb unshare logic from +__split_vma() in the same place where THP splitting also happens. At that +point, both the VMA and the rmap(s) are write-locked. + +An annoying detail is that we can now call into the helper +hugetlb_unshare_pmds() from two different locking contexts: + +1. from hugetlb_split(), holding: + - mmap lock (exclusively) + - VMA lock + - file rmap lock (exclusively) +2. hugetlb_unshare_all_pmds(), which I think is designed to be able to + call us with only the mmap lock held (in shared mode), but currently + only runs while holding mmap lock (exclusively) and VMA lock + +Backporting note: +This commit fixes a racy protection that was introduced in commit +b30c14cd6102 ("hugetlb: unshare some PMDs when splitting VMAs"); that +commit claimed to fix an issue introduced in 5.13, but it should actually +also go all the way back. + +[jannh@google.com: v2] + Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-1-1329349bad1a@google.com +Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-0-1329349bad1a@google.com +Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-1-f4136f5ec58a@google.com +Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page") +Signed-off-by: Jann Horn +Cc: Liam Howlett +Reviewed-by: Lorenzo Stoakes +Reviewed-by: Oscar Salvador +Cc: Lorenzo Stoakes +Cc: Vlastimil Babka +Cc: [b30c14cd6102: hugetlb: unshare some PMDs when splitting VMAs] +Cc: +Signed-off-by: Andrew Morton +--- + include/linux/hugetlb.h | 3 ++ + mm/hugetlb.c | 60 +++++++++++++++++++++++--------- + mm/vma.c | 7 ++++ + tools/testing/vma/vma_internal.h | 2 ++ + 4 files changed, 56 insertions(+), 16 deletions(-) + +--- a/include/linux/hugetlb.h ++++ b/include/linux/hugetlb.h +@@ -276,6 +276,7 @@ bool is_hugetlb_entry_migration(pte_t pt + bool is_hugetlb_entry_hwpoisoned(pte_t pte); + void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); + void fixup_hugetlb_reservations(struct vm_area_struct *vma); ++void hugetlb_split(struct vm_area_struct *vma, unsigned long addr); + + #else /* !CONFIG_HUGETLB_PAGE */ + +@@ -473,6 +474,8 @@ static inline void fixup_hugetlb_reserva + { + } + ++static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {} ++ + #endif /* !CONFIG_HUGETLB_PAGE */ + + #ifndef pgd_write +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -120,7 +120,7 @@ static void hugetlb_vma_lock_free(struct + static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); + static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); + static void hugetlb_unshare_pmds(struct vm_area_struct *vma, +- unsigned long start, unsigned long end); ++ unsigned long start, unsigned long end, bool take_locks); + static struct resv_map *vma_resv_map(struct vm_area_struct *vma); + + static void hugetlb_free_folio(struct folio *folio) +@@ -5426,26 +5426,40 @@ static int hugetlb_vm_op_split(struct vm + { + if (addr & ~(huge_page_mask(hstate_vma(vma)))) + return -EINVAL; ++ return 0; ++} + ++void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) ++{ + /* + * PMD sharing is only possible for PUD_SIZE-aligned address ranges + * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this + * split, unshare PMDs in the PUD_SIZE interval surrounding addr now. ++ * This function is called in the middle of a VMA split operation, with ++ * MM, VMA and rmap all write-locked to prevent concurrent page table ++ * walks (except hardware and gup_fast()). + */ ++ vma_assert_write_locked(vma); ++ i_mmap_assert_write_locked(vma->vm_file->f_mapping); ++ + if (addr & ~PUD_MASK) { +- /* +- * hugetlb_vm_op_split is called right before we attempt to +- * split the VMA. We will need to unshare PMDs in the old and +- * new VMAs, so let's unshare before we split. +- */ + unsigned long floor = addr & PUD_MASK; + unsigned long ceil = floor + PUD_SIZE; + +- if (floor >= vma->vm_start && ceil <= vma->vm_end) +- hugetlb_unshare_pmds(vma, floor, ceil); ++ if (floor >= vma->vm_start && ceil <= vma->vm_end) { ++ /* ++ * Locking: ++ * Use take_locks=false here. ++ * The file rmap lock is already held. ++ * The hugetlb VMA lock can't be taken when we already ++ * hold the file rmap lock, and we don't need it because ++ * its purpose is to synchronize against concurrent page ++ * table walks, which are not possible thanks to the ++ * locks held by our caller. ++ */ ++ hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false); ++ } + } +- +- return 0; + } + + static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma) +@@ -7884,9 +7898,16 @@ void move_hugetlb_state(struct folio *ol + spin_unlock_irq(&hugetlb_lock); + } + ++/* ++ * If @take_locks is false, the caller must ensure that no concurrent page table ++ * access can happen (except for gup_fast() and hardware page walks). ++ * If @take_locks is true, we take the hugetlb VMA lock (to lock out things like ++ * concurrent page fault handling) and the file rmap lock. ++ */ + static void hugetlb_unshare_pmds(struct vm_area_struct *vma, + unsigned long start, +- unsigned long end) ++ unsigned long end, ++ bool take_locks) + { + struct hstate *h = hstate_vma(vma); + unsigned long sz = huge_page_size(h); +@@ -7910,8 +7931,12 @@ static void hugetlb_unshare_pmds(struct + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, + start, end); + mmu_notifier_invalidate_range_start(&range); +- hugetlb_vma_lock_write(vma); +- i_mmap_lock_write(vma->vm_file->f_mapping); ++ if (take_locks) { ++ hugetlb_vma_lock_write(vma); ++ i_mmap_lock_write(vma->vm_file->f_mapping); ++ } else { ++ i_mmap_assert_write_locked(vma->vm_file->f_mapping); ++ } + for (address = start; address < end; address += PUD_SIZE) { + ptep = hugetlb_walk(vma, address, sz); + if (!ptep) +@@ -7921,8 +7946,10 @@ static void hugetlb_unshare_pmds(struct + spin_unlock(ptl); + } + flush_hugetlb_tlb_range(vma, start, end); +- i_mmap_unlock_write(vma->vm_file->f_mapping); +- hugetlb_vma_unlock_write(vma); ++ if (take_locks) { ++ i_mmap_unlock_write(vma->vm_file->f_mapping); ++ hugetlb_vma_unlock_write(vma); ++ } + /* + * No need to call mmu_notifier_arch_invalidate_secondary_tlbs(), see + * Documentation/mm/mmu_notifier.rst. +@@ -7937,7 +7964,8 @@ static void hugetlb_unshare_pmds(struct + void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) + { + hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE), +- ALIGN_DOWN(vma->vm_end, PUD_SIZE)); ++ ALIGN_DOWN(vma->vm_end, PUD_SIZE), ++ /* take_locks = */ true); + } + + /* +--- a/mm/vma.c ++++ b/mm/vma.c +@@ -516,7 +516,14 @@ __split_vma(struct vma_iterator *vmi, st + init_vma_prep(&vp, vma); + vp.insert = new; + vma_prepare(&vp); ++ ++ /* ++ * Get rid of huge pages and shared page tables straddling the split ++ * boundary. ++ */ + vma_adjust_trans_huge(vma, vma->vm_start, addr, NULL); ++ if (is_vm_hugetlb_page(vma)) ++ hugetlb_split(vma, addr); + + if (new_below) { + vma->vm_start = addr; +--- a/tools/testing/vma/vma_internal.h ++++ b/tools/testing/vma/vma_internal.h +@@ -793,6 +793,8 @@ static inline void vma_adjust_trans_huge + (void)next; + } + ++static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {} ++ + static inline void vma_iter_free(struct vma_iterator *vmi) + { + mas_destroy(&vmi->mas); diff --git a/debian/patches/patchset-pf/fixes/0020-mm-hugetlb-fix-huge_pmd_unshare-vs-GUP-fast-race.patch b/debian/patches/patchset-pf/fixes/0020-mm-hugetlb-fix-huge_pmd_unshare-vs-GUP-fast-race.patch new file mode 100644 index 0000000..11bf0bc --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0020-mm-hugetlb-fix-huge_pmd_unshare-vs-GUP-fast-race.patch @@ -0,0 +1,50 @@ +From cbd0e47470ea4db11acf3612edf91b5047a90d24 Mon Sep 17 00:00:00 2001 +From: Jann Horn +Date: Tue, 27 May 2025 23:23:54 +0200 +Subject: mm/hugetlb: fix huge_pmd_unshare() vs GUP-fast race + +huge_pmd_unshare() drops a reference on a page table that may have +previously been shared across processes, potentially turning it into a +normal page table used in another process in which unrelated VMAs can +afterwards be installed. + +If this happens in the middle of a concurrent gup_fast(), gup_fast() could +end up walking the page tables of another process. While I don't see any +way in which that immediately leads to kernel memory corruption, it is +really weird and unexpected. + +Fix it with an explicit broadcast IPI through tlb_remove_table_sync_one(), +just like we do in khugepaged when removing page tables for a THP +collapse. + +Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-2-1329349bad1a@google.com +Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-2-f4136f5ec58a@google.com +Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page") +Signed-off-by: Jann Horn +Reviewed-by: Lorenzo Stoakes +Cc: Liam Howlett +Cc: Muchun Song +Cc: Oscar Salvador +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +--- + mm/hugetlb.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -7628,6 +7628,13 @@ int huge_pmd_unshare(struct mm_struct *m + return 0; + + pud_clear(pud); ++ /* ++ * Once our caller drops the rmap lock, some other process might be ++ * using this page table as a normal, non-hugetlb page table. ++ * Wait for pending gup_fast() in other threads to finish before letting ++ * that happen. ++ */ ++ tlb_remove_table_sync_one(); + ptdesc_pmd_pts_dec(virt_to_ptdesc(ptep)); + mm_dec_nr_pmds(mm); + return 1; diff --git a/debian/patches/patchset-pf/fixes/0021-mm-madvise-handle-madvise_lock-failure-during-race-u.patch b/debian/patches/patchset-pf/fixes/0021-mm-madvise-handle-madvise_lock-failure-during-race-u.patch new file mode 100644 index 0000000..6f88d08 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0021-mm-madvise-handle-madvise_lock-failure-during-race-u.patch @@ -0,0 +1,48 @@ +From cb42e10062f07934d60ce2a9bc154ea7ac0bab5a Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Mon, 2 Jun 2025 10:49:26 -0700 +Subject: mm/madvise: handle madvise_lock() failure during race unwinding + +When unwinding race on -ERESTARTNOINTR handling of process_madvise(), +madvise_lock() failure is ignored. Check the failure and abort remaining +works in the case. + +Link: https://lkml.kernel.org/r/20250602174926.1074-1-sj@kernel.org +Fixes: 4000e3d0a367 ("mm/madvise: remove redundant mmap_lock operations from process_madvise()") +Signed-off-by: SeongJae Park +Reported-by: Barry Song <21cnbao@gmail.com> +Closes: https://lore.kernel.org/CAGsJ_4xJXXO0G+4BizhohSZ4yDteziPw43_uF8nPXPWxUVChzw@mail.gmail.com +Reviewed-by: Jann Horn +Reviewed-by: Lorenzo Stoakes +Acked-by: David Hildenbrand +Reviewed-by: Shakeel Butt +Reviewed-by: Barry Song +Cc: Liam Howlett +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +--- + mm/madvise.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/mm/madvise.c ++++ b/mm/madvise.c +@@ -1830,7 +1830,9 @@ static ssize_t vector_madvise(struct mm_ + + /* Drop and reacquire lock to unwind race. */ + madvise_unlock(mm, behavior); +- madvise_lock(mm, behavior); ++ ret = madvise_lock(mm, behavior); ++ if (ret) ++ goto out; + continue; + } + if (ret < 0) +@@ -1839,6 +1841,7 @@ static ssize_t vector_madvise(struct mm_ + } + madvise_unlock(mm, behavior); + ++out: + ret = (total_len - iov_iter_count(iter)) ? : ret; + + return ret; diff --git a/debian/patches/patchset-pf/fixes/0022-video-screen_info-Relocate-framebuffers-behind-PCI-b.patch b/debian/patches/patchset-pf/fixes/0022-video-screen_info-Relocate-framebuffers-behind-PCI-b.patch new file mode 100644 index 0000000..6b9ba1b --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0022-video-screen_info-Relocate-framebuffers-behind-PCI-b.patch @@ -0,0 +1,164 @@ +From 0aeb6f83ff11709bb4b6fc9afa2f742681ca36e1 Mon Sep 17 00:00:00 2001 +From: Thomas Zimmermann +Date: Wed, 28 May 2025 10:02:08 +0200 +Subject: video: screen_info: Relocate framebuffers behind PCI bridges + +Apply PCI host-bridge window offsets to screen_info framebuffers. Fixes +invalid access to I/O memory. + +Resources behind a PCI host bridge can be relocated by a certain offset +in the kernel's CPU address range used for I/O. The framebuffer memory +range stored in screen_info refers to the CPU addresses as seen during +boot (where the offset is 0). During boot up, firmware may assign a +different memory offset to the PCI host bridge and thereby relocating +the framebuffer address of the PCI graphics device as seen by the kernel. +The information in screen_info must be updated as well. + +The helper pcibios_bus_to_resource() performs the relocation of the +screen_info's framebuffer resource (given in PCI bus addresses). The +result matches the I/O-memory resource of the PCI graphics device (given +in CPU addresses). As before, we store away the information necessary to +later update the information in screen_info itself. + +Commit 78aa89d1dfba ("firmware/sysfb: Update screen_info for relocated +EFI framebuffers") added the code for updating screen_info. It is based +on similar functionality that pre-existed in efifb. Efifb uses a pointer +to the PCI resource, while the newer code does a memcpy of the region. +Hence efifb sees any updates to the PCI resource and avoids the issue. + +v3: +- Only use struct pci_bus_region for PCI bus addresses (Bjorn) +- Clarify address semantics in commit messages and comments (Bjorn) +v2: +- Fixed tags (Takashi, Ivan) +- Updated information on efifb + +Signed-off-by: Thomas Zimmermann +Reviewed-by: Javier Martinez Canillas +Reported-by: "Ivan T. Ivanov" +Closes: https://bugzilla.suse.com/show_bug.cgi?id=1240696 +Tested-by: "Ivan T. Ivanov" +Fixes: 78aa89d1dfba ("firmware/sysfb: Update screen_info for relocated EFI framebuffers") +Cc: dri-devel@lists.freedesktop.org +Cc: # v6.9+ +Link: https://lore.kernel.org/r/20250528080234.7380-1-tzimmermann@suse.de +--- + drivers/video/screen_info_pci.c | 79 +++++++++++++++++++++------------ + 1 file changed, 50 insertions(+), 29 deletions(-) + +--- a/drivers/video/screen_info_pci.c ++++ b/drivers/video/screen_info_pci.c +@@ -7,8 +7,8 @@ + + static struct pci_dev *screen_info_lfb_pdev; + static size_t screen_info_lfb_bar; +-static resource_size_t screen_info_lfb_offset; +-static struct resource screen_info_lfb_res = DEFINE_RES_MEM(0, 0); ++static resource_size_t screen_info_lfb_res_start; // original start of resource ++static resource_size_t screen_info_lfb_offset; // framebuffer offset within resource + + static bool __screen_info_relocation_is_valid(const struct screen_info *si, struct resource *pr) + { +@@ -31,7 +31,7 @@ void screen_info_apply_fixups(void) + if (screen_info_lfb_pdev) { + struct resource *pr = &screen_info_lfb_pdev->resource[screen_info_lfb_bar]; + +- if (pr->start != screen_info_lfb_res.start) { ++ if (pr->start != screen_info_lfb_res_start) { + if (__screen_info_relocation_is_valid(si, pr)) { + /* + * Only update base if we have an actual +@@ -47,46 +47,67 @@ void screen_info_apply_fixups(void) + } + } + ++static int __screen_info_lfb_pci_bus_region(const struct screen_info *si, unsigned int type, ++ struct pci_bus_region *r) ++{ ++ u64 base, size; ++ ++ base = __screen_info_lfb_base(si); ++ if (!base) ++ return -EINVAL; ++ ++ size = __screen_info_lfb_size(si, type); ++ if (!size) ++ return -EINVAL; ++ ++ r->start = base; ++ r->end = base + size - 1; ++ ++ return 0; ++} ++ + static void screen_info_fixup_lfb(struct pci_dev *pdev) + { + unsigned int type; +- struct resource res[SCREEN_INFO_MAX_RESOURCES]; +- size_t i, numres; ++ struct pci_bus_region bus_region; + int ret; ++ struct resource r = { ++ .flags = IORESOURCE_MEM, ++ }; ++ const struct resource *pr; + const struct screen_info *si = &screen_info; + + if (screen_info_lfb_pdev) + return; // already found + + type = screen_info_video_type(si); +- if (type != VIDEO_TYPE_EFI) +- return; // only applies to EFI ++ if (!__screen_info_has_lfb(type)) ++ return; // only applies to EFI; maybe VESA + +- ret = screen_info_resources(si, res, ARRAY_SIZE(res)); ++ ret = __screen_info_lfb_pci_bus_region(si, type, &bus_region); + if (ret < 0) + return; +- numres = ret; + +- for (i = 0; i < numres; ++i) { +- struct resource *r = &res[i]; +- const struct resource *pr; +- +- if (!(r->flags & IORESOURCE_MEM)) +- continue; +- pr = pci_find_resource(pdev, r); +- if (!pr) +- continue; +- +- /* +- * We've found a PCI device with the framebuffer +- * resource. Store away the parameters to track +- * relocation of the framebuffer aperture. +- */ +- screen_info_lfb_pdev = pdev; +- screen_info_lfb_bar = pr - pdev->resource; +- screen_info_lfb_offset = r->start - pr->start; +- memcpy(&screen_info_lfb_res, r, sizeof(screen_info_lfb_res)); +- } ++ /* ++ * Translate the PCI bus address to resource. Account ++ * for an offset if the framebuffer is behind a PCI host ++ * bridge. ++ */ ++ pcibios_bus_to_resource(pdev->bus, &r, &bus_region); ++ ++ pr = pci_find_resource(pdev, &r); ++ if (!pr) ++ return; ++ ++ /* ++ * We've found a PCI device with the framebuffer ++ * resource. Store away the parameters to track ++ * relocation of the framebuffer aperture. ++ */ ++ screen_info_lfb_pdev = pdev; ++ screen_info_lfb_bar = pr - pdev->resource; ++ screen_info_lfb_offset = r.start - pr->start; ++ screen_info_lfb_res_start = bus_region.start; + } + DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID, PCI_BASE_CLASS_DISPLAY, 16, + screen_info_fixup_lfb); diff --git a/debian/patches/patchset-pf/fixes/0023-sysfb-Fix-screen_info-type-check-for-VGA.patch b/debian/patches/patchset-pf/fixes/0023-sysfb-Fix-screen_info-type-check-for-VGA.patch new file mode 100644 index 0000000..d577131 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0023-sysfb-Fix-screen_info-type-check-for-VGA.patch @@ -0,0 +1,86 @@ +From 06ff725d11ea8713876187973c834fb595cb26f1 Mon Sep 17 00:00:00 2001 +From: Thomas Zimmermann +Date: Tue, 3 Jun 2025 17:48:20 +0200 +Subject: sysfb: Fix screen_info type check for VGA +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Use the helper screen_info_video_type() to get the framebuffer +type from struct screen_info. Handle supported values in sorted +switch statement. + +Reading orig_video_isVGA is unreliable. On most systems it is a +VIDEO_TYPE_ constant. On some systems with VGA it is simply set +to 1 to signal the presence of a VGA output. See vga_probe() for +an example. Retrieving the screen_info type with the helper +screen_info_video_type() detects these cases and returns the +appropriate VIDEO_TYPE_ constant. For VGA, sysfb creates a device +named "vga-framebuffer". + +The sysfb code has been taken from vga16fb, where it likely didn't +work correctly either. With this bugfix applied, vga16fb loads for +compatible vga-framebuffer devices. + +Fixes: 0db5b61e0dc0 ("fbdev/vga16fb: Create EGA/VGA devices in sysfb code") +Cc: Thomas Zimmermann +Cc: Javier Martinez Canillas +Cc: Alex Deucher +Cc: Tzung-Bi Shih +Cc: Helge Deller +Cc: "Uwe Kleine-König" +Cc: Zsolt Kajtar +Cc: # v6.1+ +Signed-off-by: Thomas Zimmermann +Reviewed-by: Tzung-Bi Shih +Reviewed-by: Javier Martinez Canillas +Link: https://lore.kernel.org/r/20250603154838.401882-1-tzimmermann@suse.de +--- + drivers/firmware/sysfb.c | 26 ++++++++++++++++++-------- + 1 file changed, 18 insertions(+), 8 deletions(-) + +--- a/drivers/firmware/sysfb.c ++++ b/drivers/firmware/sysfb.c +@@ -143,6 +143,7 @@ static __init int sysfb_init(void) + { + struct screen_info *si = &screen_info; + struct device *parent; ++ unsigned int type; + struct simplefb_platform_data mode; + const char *name; + bool compatible; +@@ -170,17 +171,26 @@ static __init int sysfb_init(void) + goto put_device; + } + ++ type = screen_info_video_type(si); ++ + /* if the FB is incompatible, create a legacy framebuffer device */ +- if (si->orig_video_isVGA == VIDEO_TYPE_EFI) +- name = "efi-framebuffer"; +- else if (si->orig_video_isVGA == VIDEO_TYPE_VLFB) +- name = "vesa-framebuffer"; +- else if (si->orig_video_isVGA == VIDEO_TYPE_VGAC) +- name = "vga-framebuffer"; +- else if (si->orig_video_isVGA == VIDEO_TYPE_EGAC) ++ switch (type) { ++ case VIDEO_TYPE_EGAC: + name = "ega-framebuffer"; +- else ++ break; ++ case VIDEO_TYPE_VGAC: ++ name = "vga-framebuffer"; ++ break; ++ case VIDEO_TYPE_VLFB: ++ name = "vesa-framebuffer"; ++ break; ++ case VIDEO_TYPE_EFI: ++ name = "efi-framebuffer"; ++ break; ++ default: + name = "platform-framebuffer"; ++ break; ++ } + + pd = platform_device_alloc(name, 0); + if (!pd) { diff --git a/debian/patches/patchset-pf/fixes/0024-x86-iopl-Cure-TIF_IO_BITMAP-inconsistencies.patch b/debian/patches/patchset-pf/fixes/0024-x86-iopl-Cure-TIF_IO_BITMAP-inconsistencies.patch new file mode 100644 index 0000000..1d29025 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0024-x86-iopl-Cure-TIF_IO_BITMAP-inconsistencies.patch @@ -0,0 +1,113 @@ +From ba4c83076943b477c90015581cc88e262a7d772f Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Wed, 26 Feb 2025 16:01:57 +0100 +Subject: x86/iopl: Cure TIF_IO_BITMAP inconsistencies + +io_bitmap_exit() is invoked from exit_thread() when a task exists or +when a fork fails. In the latter case the exit_thread() cleans up +resources which were allocated during fork(). + +io_bitmap_exit() invokes task_update_io_bitmap(), which in turn ends up +in tss_update_io_bitmap(). tss_update_io_bitmap() operates on the +current task. If current has TIF_IO_BITMAP set, but no bitmap installed, +tss_update_io_bitmap() crashes with a NULL pointer dereference. + +There are two issues, which lead to that problem: + + 1) io_bitmap_exit() should not invoke task_update_io_bitmap() when + the task, which is cleaned up, is not the current task. That's a + clear indicator for a cleanup after a failed fork(). + + 2) A task should not have TIF_IO_BITMAP set and neither a bitmap + installed nor IOPL emulation level 3 activated. + + This happens when a kernel thread is created in the context of + a user space thread, which has TIF_IO_BITMAP set as the thread + flags are copied and the IO bitmap pointer is cleared. + + Other than in the failed fork() case this has no impact because + kernel threads including IO workers never return to user space and + therefore never invoke tss_update_io_bitmap(). + +Cure this by adding the missing cleanups and checks: + + 1) Prevent io_bitmap_exit() to invoke task_update_io_bitmap() if + the to be cleaned up task is not the current task. + + 2) Clear TIF_IO_BITMAP in copy_thread() unconditionally. For user + space forks it is set later, when the IO bitmap is inherited in + io_bitmap_share(). + +For paranoia sake, add a warning into tss_update_io_bitmap() to catch +the case, when that code is invoked with inconsistent state. + +Fixes: ea5f1cd7ab49 ("x86/ioperm: Remove bitmap if all permissions dropped") +Reported-by: syzbot+e2b1803445d236442e54@syzkaller.appspotmail.com +Signed-off-by: Thomas Gleixner +Signed-off-by: Borislav Petkov (AMD) +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/87wmdceom2.ffs@tglx +--- + arch/x86/kernel/ioport.c | 13 +++++++++---- + arch/x86/kernel/process.c | 6 ++++++ + 2 files changed, 15 insertions(+), 4 deletions(-) + +--- a/arch/x86/kernel/ioport.c ++++ b/arch/x86/kernel/ioport.c +@@ -33,8 +33,9 @@ void io_bitmap_share(struct task_struct + set_tsk_thread_flag(tsk, TIF_IO_BITMAP); + } + +-static void task_update_io_bitmap(struct task_struct *tsk) ++static void task_update_io_bitmap(void) + { ++ struct task_struct *tsk = current; + struct thread_struct *t = &tsk->thread; + + if (t->iopl_emul == 3 || t->io_bitmap) { +@@ -54,7 +55,12 @@ void io_bitmap_exit(struct task_struct * + struct io_bitmap *iobm = tsk->thread.io_bitmap; + + tsk->thread.io_bitmap = NULL; +- task_update_io_bitmap(tsk); ++ /* ++ * Don't touch the TSS when invoked on a failed fork(). TSS ++ * reflects the state of @current and not the state of @tsk. ++ */ ++ if (tsk == current) ++ task_update_io_bitmap(); + if (iobm && refcount_dec_and_test(&iobm->refcnt)) + kfree(iobm); + } +@@ -192,8 +198,7 @@ SYSCALL_DEFINE1(iopl, unsigned int, leve + } + + t->iopl_emul = level; +- task_update_io_bitmap(current); +- ++ task_update_io_bitmap(); + return 0; + } + +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -181,6 +181,7 @@ int copy_thread(struct task_struct *p, c + frame->ret_addr = (unsigned long) ret_from_fork_asm; + p->thread.sp = (unsigned long) fork_frame; + p->thread.io_bitmap = NULL; ++ clear_tsk_thread_flag(p, TIF_IO_BITMAP); + p->thread.iopl_warn = 0; + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); + +@@ -469,6 +470,11 @@ void native_tss_update_io_bitmap(void) + } else { + struct io_bitmap *iobm = t->io_bitmap; + ++ if (WARN_ON_ONCE(!iobm)) { ++ clear_thread_flag(TIF_IO_BITMAP); ++ native_tss_invalidate_io_bitmap(); ++ } ++ + /* + * Only copy bitmap data when the sequence number differs. The + * update time is accounted to the incoming task. diff --git a/debian/patches/patchset-pf/fixes/0025-watchdog-fix-watchdog-may-detect-false-positive-of-s.patch b/debian/patches/patchset-pf/fixes/0025-watchdog-fix-watchdog-may-detect-false-positive-of-s.patch new file mode 100644 index 0000000..0242410 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0025-watchdog-fix-watchdog-may-detect-false-positive-of-s.patch @@ -0,0 +1,200 @@ +From 7856e6900a09ed537366a5e0c774be8926ee022e Mon Sep 17 00:00:00 2001 +From: Luo Gengkun +Date: Mon, 21 Apr 2025 03:50:21 +0000 +Subject: watchdog: fix watchdog may detect false positive of softlockup + +When updating `watchdog_thresh`, there is a race condition between writing +the new `watchdog_thresh` value and stopping the old watchdog timer. If +the old timer triggers during this window, it may falsely detect a +softlockup due to the old interval and the new `watchdog_thresh` value +being used. The problem can be described as follow: + + # We asuume previous watchdog_thresh is 60, so the watchdog timer is + # coming every 24s. +echo 10 > /proc/sys/kernel/watchdog_thresh (User space) +| ++------>+ update watchdog_thresh (We are in kernel now) + | + | # using old interval and new `watchdog_thresh` + +------>+ watchdog hrtimer (irq context: detect softlockup) + | + | + +-------+ + | + | + + softlockup_stop_all + +To fix this problem, introduce a shadow variable for `watchdog_thresh`. +The update to the actual `watchdog_thresh` is delayed until after the old +timer is stopped, preventing false positives. + +The following testcase may help to understand this problem. + +--------------------------------------------- +echo RT_RUNTIME_SHARE > /sys/kernel/debug/sched/features +echo -1 > /proc/sys/kernel/sched_rt_runtime_us +echo 0 > /sys/kernel/debug/sched/fair_server/cpu3/runtime +echo 60 > /proc/sys/kernel/watchdog_thresh +taskset -c 3 chrt -r 99 /bin/bash -c "while true;do true; done" & +echo 10 > /proc/sys/kernel/watchdog_thresh & +--------------------------------------------- + +The test case above first removes the throttling restrictions for +real-time tasks. It then sets watchdog_thresh to 60 and executes a +real-time task ,a simple while(1) loop, on cpu3. Consequently, the final +command gets blocked because the presence of this real-time thread +prevents kworker:3 from being selected by the scheduler. This eventually +triggers a softlockup detection on cpu3 due to watchdog_timer_fn operating +with inconsistent variable - using both the old interval and the updated +watchdog_thresh simultaneously. + +[nysal@linux.ibm.com: fix the SOFTLOCKUP_DETECTOR=n case] + Link: https://lkml.kernel.org/r/20250502111120.282690-1-nysal@linux.ibm.com +Link: https://lkml.kernel.org/r/20250421035021.3507649-1-luogengkun@huaweicloud.com +Signed-off-by: Luo Gengkun +Signed-off-by: Nysal Jan K.A. +Cc: Doug Anderson +Cc: Joel Granados +Cc: Song Liu +Cc: Thomas Gleinxer +Cc: "Nysal Jan K.A." +Cc: Venkat Rao Bagalkote +Cc: +Signed-off-by: Andrew Morton +--- + kernel/watchdog.c | 41 +++++++++++++++++++++++++++-------------- + 1 file changed, 27 insertions(+), 14 deletions(-) + +--- a/kernel/watchdog.c ++++ b/kernel/watchdog.c +@@ -47,6 +47,7 @@ int __read_mostly watchdog_user_enabled + static int __read_mostly watchdog_hardlockup_user_enabled = WATCHDOG_HARDLOCKUP_DEFAULT; + static int __read_mostly watchdog_softlockup_user_enabled = 1; + int __read_mostly watchdog_thresh = 10; ++static int __read_mostly watchdog_thresh_next; + static int __read_mostly watchdog_hardlockup_available; + + struct cpumask watchdog_cpumask __read_mostly; +@@ -870,12 +871,20 @@ int lockup_detector_offline_cpu(unsigned + return 0; + } + +-static void __lockup_detector_reconfigure(void) ++static void __lockup_detector_reconfigure(bool thresh_changed) + { + cpus_read_lock(); + watchdog_hardlockup_stop(); + + softlockup_stop_all(); ++ /* ++ * To prevent watchdog_timer_fn from using the old interval and ++ * the new watchdog_thresh at the same time, which could lead to ++ * false softlockup reports, it is necessary to update the ++ * watchdog_thresh after the softlockup is completed. ++ */ ++ if (thresh_changed) ++ watchdog_thresh = READ_ONCE(watchdog_thresh_next); + set_sample_period(); + lockup_detector_update_enable(); + if (watchdog_enabled && watchdog_thresh) +@@ -888,7 +897,7 @@ static void __lockup_detector_reconfigur + void lockup_detector_reconfigure(void) + { + mutex_lock(&watchdog_mutex); +- __lockup_detector_reconfigure(); ++ __lockup_detector_reconfigure(false); + mutex_unlock(&watchdog_mutex); + } + +@@ -908,27 +917,29 @@ static __init void lockup_detector_setup + return; + + mutex_lock(&watchdog_mutex); +- __lockup_detector_reconfigure(); ++ __lockup_detector_reconfigure(false); + softlockup_initialized = true; + mutex_unlock(&watchdog_mutex); + } + + #else /* CONFIG_SOFTLOCKUP_DETECTOR */ +-static void __lockup_detector_reconfigure(void) ++static void __lockup_detector_reconfigure(bool thresh_changed) + { + cpus_read_lock(); + watchdog_hardlockup_stop(); ++ if (thresh_changed) ++ watchdog_thresh = READ_ONCE(watchdog_thresh_next); + lockup_detector_update_enable(); + watchdog_hardlockup_start(); + cpus_read_unlock(); + } + void lockup_detector_reconfigure(void) + { +- __lockup_detector_reconfigure(); ++ __lockup_detector_reconfigure(false); + } + static inline void lockup_detector_setup(void) + { +- __lockup_detector_reconfigure(); ++ __lockup_detector_reconfigure(false); + } + #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ + +@@ -946,11 +957,11 @@ void lockup_detector_soft_poweroff(void) + #ifdef CONFIG_SYSCTL + + /* Propagate any changes to the watchdog infrastructure */ +-static void proc_watchdog_update(void) ++static void proc_watchdog_update(bool thresh_changed) + { + /* Remove impossible cpus to keep sysctl output clean. */ + cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); +- __lockup_detector_reconfigure(); ++ __lockup_detector_reconfigure(thresh_changed); + } + + /* +@@ -984,7 +995,7 @@ static int proc_watchdog_common(int whic + } else { + err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (!err && old != READ_ONCE(*param)) +- proc_watchdog_update(); ++ proc_watchdog_update(false); + } + mutex_unlock(&watchdog_mutex); + return err; +@@ -1035,11 +1046,13 @@ static int proc_watchdog_thresh(const st + + mutex_lock(&watchdog_mutex); + +- old = READ_ONCE(watchdog_thresh); ++ watchdog_thresh_next = READ_ONCE(watchdog_thresh); ++ ++ old = watchdog_thresh_next; + err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + +- if (!err && write && old != READ_ONCE(watchdog_thresh)) +- proc_watchdog_update(); ++ if (!err && write && old != READ_ONCE(watchdog_thresh_next)) ++ proc_watchdog_update(true); + + mutex_unlock(&watchdog_mutex); + return err; +@@ -1060,7 +1073,7 @@ static int proc_watchdog_cpumask(const s + + err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); + if (!err && write) +- proc_watchdog_update(); ++ proc_watchdog_update(false); + + mutex_unlock(&watchdog_mutex); + return err; +@@ -1080,7 +1093,7 @@ static const struct ctl_table watchdog_s + }, + { + .procname = "watchdog_thresh", +- .data = &watchdog_thresh, ++ .data = &watchdog_thresh_next, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_watchdog_thresh, diff --git a/debian/patches/patchset-pf/fixes/0026-sched-rt-Fix-race-in-push_rt_task.patch b/debian/patches/patchset-pf/fixes/0026-sched-rt-Fix-race-in-push_rt_task.patch new file mode 100644 index 0000000..9a8d90f --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0026-sched-rt-Fix-race-in-push_rt_task.patch @@ -0,0 +1,288 @@ +From 45c6602b7fa2a9dfd05a1f9289504c2437205ce4 Mon Sep 17 00:00:00 2001 +From: Harshit Agarwal +Date: Tue, 25 Feb 2025 18:05:53 +0000 +Subject: sched/rt: Fix race in push_rt_task +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Overview +======== +When a CPU chooses to call push_rt_task and picks a task to push to +another CPU's runqueue then it will call find_lock_lowest_rq method +which would take a double lock on both CPUs' runqueues. If one of the +locks aren't readily available, it may lead to dropping the current +runqueue lock and reacquiring both the locks at once. During this window +it is possible that the task is already migrated and is running on some +other CPU. These cases are already handled. However, if the task is +migrated and has already been executed and another CPU is now trying to +wake it up (ttwu) such that it is queued again on the runqeue +(on_rq is 1) and also if the task was run by the same CPU, then the +current checks will pass even though the task was migrated out and is no +longer in the pushable tasks list. + +Crashes +======= +This bug resulted in quite a few flavors of crashes triggering kernel +panics with various crash signatures such as assert failures, page +faults, null pointer dereferences, and queue corruption errors all +coming from scheduler itself. + +Some of the crashes: +-> kernel BUG at kernel/sched/rt.c:1616! BUG_ON(idx >= MAX_RT_PRIO) + Call Trace: + ? __die_body+0x1a/0x60 + ? die+0x2a/0x50 + ? do_trap+0x85/0x100 + ? pick_next_task_rt+0x6e/0x1d0 + ? do_error_trap+0x64/0xa0 + ? pick_next_task_rt+0x6e/0x1d0 + ? exc_invalid_op+0x4c/0x60 + ? pick_next_task_rt+0x6e/0x1d0 + ? asm_exc_invalid_op+0x12/0x20 + ? pick_next_task_rt+0x6e/0x1d0 + __schedule+0x5cb/0x790 + ? update_ts_time_stats+0x55/0x70 + schedule_idle+0x1e/0x40 + do_idle+0x15e/0x200 + cpu_startup_entry+0x19/0x20 + start_secondary+0x117/0x160 + secondary_startup_64_no_verify+0xb0/0xbb + +-> BUG: kernel NULL pointer dereference, address: 00000000000000c0 + Call Trace: + ? __die_body+0x1a/0x60 + ? no_context+0x183/0x350 + ? __warn+0x8a/0xe0 + ? exc_page_fault+0x3d6/0x520 + ? asm_exc_page_fault+0x1e/0x30 + ? pick_next_task_rt+0xb5/0x1d0 + ? pick_next_task_rt+0x8c/0x1d0 + __schedule+0x583/0x7e0 + ? update_ts_time_stats+0x55/0x70 + schedule_idle+0x1e/0x40 + do_idle+0x15e/0x200 + cpu_startup_entry+0x19/0x20 + start_secondary+0x117/0x160 + secondary_startup_64_no_verify+0xb0/0xbb + +-> BUG: unable to handle page fault for address: ffff9464daea5900 + kernel BUG at kernel/sched/rt.c:1861! BUG_ON(rq->cpu != task_cpu(p)) + +-> kernel BUG at kernel/sched/rt.c:1055! BUG_ON(!rq->nr_running) + Call Trace: + ? __die_body+0x1a/0x60 + ? die+0x2a/0x50 + ? do_trap+0x85/0x100 + ? dequeue_top_rt_rq+0xa2/0xb0 + ? do_error_trap+0x64/0xa0 + ? dequeue_top_rt_rq+0xa2/0xb0 + ? exc_invalid_op+0x4c/0x60 + ? dequeue_top_rt_rq+0xa2/0xb0 + ? asm_exc_invalid_op+0x12/0x20 + ? dequeue_top_rt_rq+0xa2/0xb0 + dequeue_rt_entity+0x1f/0x70 + dequeue_task_rt+0x2d/0x70 + __schedule+0x1a8/0x7e0 + ? blk_finish_plug+0x25/0x40 + schedule+0x3c/0xb0 + futex_wait_queue_me+0xb6/0x120 + futex_wait+0xd9/0x240 + do_futex+0x344/0xa90 + ? get_mm_exe_file+0x30/0x60 + ? audit_exe_compare+0x58/0x70 + ? audit_filter_rules.constprop.26+0x65e/0x1220 + __x64_sys_futex+0x148/0x1f0 + do_syscall_64+0x30/0x80 + entry_SYSCALL_64_after_hwframe+0x62/0xc7 + +-> BUG: unable to handle page fault for address: ffff8cf3608bc2c0 + Call Trace: + ? __die_body+0x1a/0x60 + ? no_context+0x183/0x350 + ? spurious_kernel_fault+0x171/0x1c0 + ? exc_page_fault+0x3b6/0x520 + ? plist_check_list+0x15/0x40 + ? plist_check_list+0x2e/0x40 + ? asm_exc_page_fault+0x1e/0x30 + ? _cond_resched+0x15/0x30 + ? futex_wait_queue_me+0xc8/0x120 + ? futex_wait+0xd9/0x240 + ? try_to_wake_up+0x1b8/0x490 + ? futex_wake+0x78/0x160 + ? do_futex+0xcd/0xa90 + ? plist_check_list+0x15/0x40 + ? plist_check_list+0x2e/0x40 + ? plist_del+0x6a/0xd0 + ? plist_check_list+0x15/0x40 + ? plist_check_list+0x2e/0x40 + ? dequeue_pushable_task+0x20/0x70 + ? __schedule+0x382/0x7e0 + ? asm_sysvec_reschedule_ipi+0xa/0x20 + ? schedule+0x3c/0xb0 + ? exit_to_user_mode_prepare+0x9e/0x150 + ? irqentry_exit_to_user_mode+0x5/0x30 + ? asm_sysvec_reschedule_ipi+0x12/0x20 + +Above are some of the common examples of the crashes that were observed +due to this issue. + +Details +======= +Let's look at the following scenario to understand this race. + +1) CPU A enters push_rt_task + a) CPU A has chosen next_task = task p. + b) CPU A calls find_lock_lowest_rq(Task p, CPU Z’s rq). + c) CPU A identifies CPU X as a destination CPU (X < Z). + d) CPU A enters double_lock_balance(CPU Z’s rq, CPU X’s rq). + e) Since X is lower than Z, CPU A unlocks CPU Z’s rq. Someone else has + locked CPU X’s rq, and thus, CPU A must wait. + +2) At CPU Z + a) Previous task has completed execution and thus, CPU Z enters + schedule, locks its own rq after CPU A releases it. + b) CPU Z dequeues previous task and begins executing task p. + c) CPU Z unlocks its rq. + d) Task p yields the CPU (ex. by doing IO or waiting to acquire a + lock) which triggers the schedule function on CPU Z. + e) CPU Z enters schedule again, locks its own rq, and dequeues task p. + f) As part of dequeue, it sets p.on_rq = 0 and unlocks its rq. + +3) At CPU B + a) CPU B enters try_to_wake_up with input task p. + b) Since CPU Z dequeued task p, p.on_rq = 0, and CPU B updates + B.state = WAKING. + c) CPU B via select_task_rq determines CPU Y as the target CPU. + +4) The race + a) CPU A acquires CPU X’s lock and relocks CPU Z. + b) CPU A reads task p.cpu = Z and incorrectly concludes task p is + still on CPU Z. + c) CPU A failed to notice task p had been dequeued from CPU Z while + CPU A was waiting for locks in double_lock_balance. If CPU A knew + that task p had been dequeued, it would return NULL forcing + push_rt_task to give up the task p's migration. + d) CPU B updates task p.cpu = Y and calls ttwu_queue. + e) CPU B locks Ys rq. CPU B enqueues task p onto Y and sets task + p.on_rq = 1. + f) CPU B unlocks CPU Y, triggering memory synchronization. + g) CPU A reads task p.on_rq = 1, cementing its assumption that task p + has not migrated. + h) CPU A decides to migrate p to CPU X. + +This leads to A dequeuing p from Y's queue and various crashes down the +line. + +Solution +======== +The solution here is fairly simple. After obtaining the lock (at 4a), +the check is enhanced to make sure that the task is still at the head of +the pushable tasks list. If not, then it is anyway not suitable for +being pushed out. + +Testing +======= +The fix is tested on a cluster of 3 nodes, where the panics due to this +are hit every couple of days. A fix similar to this was deployed on such +cluster and was stable for more than 30 days. + +Co-developed-by: Jon Kohler +Signed-off-by: Jon Kohler +Co-developed-by: Gauri Patwardhan +Signed-off-by: Gauri Patwardhan +Co-developed-by: Rahul Chunduru +Signed-off-by: Rahul Chunduru +Signed-off-by: Harshit Agarwal +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: "Steven Rostedt (Google)" +Reviewed-by: Phil Auld +Tested-by: Will Ton +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20250225180553.167995-1-harshit@nutanix.com +--- + kernel/sched/rt.c | 54 +++++++++++++++++++++++------------------------ + 1 file changed, 26 insertions(+), 28 deletions(-) + +--- a/kernel/sched/rt.c ++++ b/kernel/sched/rt.c +@@ -1883,6 +1883,27 @@ static int find_lowest_rq(struct task_st + return -1; + } + ++static struct task_struct *pick_next_pushable_task(struct rq *rq) ++{ ++ struct task_struct *p; ++ ++ if (!has_pushable_tasks(rq)) ++ return NULL; ++ ++ p = plist_first_entry(&rq->rt.pushable_tasks, ++ struct task_struct, pushable_tasks); ++ ++ BUG_ON(rq->cpu != task_cpu(p)); ++ BUG_ON(task_current(rq, p)); ++ BUG_ON(task_current_donor(rq, p)); ++ BUG_ON(p->nr_cpus_allowed <= 1); ++ ++ BUG_ON(!task_on_rq_queued(p)); ++ BUG_ON(!rt_task(p)); ++ ++ return p; ++} ++ + /* Will lock the rq it finds */ + static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) + { +@@ -1913,18 +1934,16 @@ static struct rq *find_lock_lowest_rq(st + /* + * We had to unlock the run queue. In + * the mean time, task could have +- * migrated already or had its affinity changed. +- * Also make sure that it wasn't scheduled on its rq. ++ * migrated already or had its affinity changed, ++ * therefore check if the task is still at the ++ * head of the pushable tasks list. + * It is possible the task was scheduled, set + * "migrate_disabled" and then got preempted, so we must + * check the task migration disable flag here too. + */ +- if (unlikely(task_rq(task) != rq || ++ if (unlikely(is_migration_disabled(task) || + !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) || +- task_on_cpu(rq, task) || +- !rt_task(task) || +- is_migration_disabled(task) || +- !task_on_rq_queued(task))) { ++ task != pick_next_pushable_task(rq))) { + + double_unlock_balance(rq, lowest_rq); + lowest_rq = NULL; +@@ -1944,27 +1963,6 @@ static struct rq *find_lock_lowest_rq(st + return lowest_rq; + } + +-static struct task_struct *pick_next_pushable_task(struct rq *rq) +-{ +- struct task_struct *p; +- +- if (!has_pushable_tasks(rq)) +- return NULL; +- +- p = plist_first_entry(&rq->rt.pushable_tasks, +- struct task_struct, pushable_tasks); +- +- BUG_ON(rq->cpu != task_cpu(p)); +- BUG_ON(task_current(rq, p)); +- BUG_ON(task_current_donor(rq, p)); +- BUG_ON(p->nr_cpus_allowed <= 1); +- +- BUG_ON(!task_on_rq_queued(p)); +- BUG_ON(!rt_task(p)); +- +- return p; +-} +- + /* + * If the current CPU has more than one RT task, see if the non + * running task can migrate over to a CPU that is running a task diff --git a/debian/patches/patchset-pf/fixes/0027-sched-fair-Adhere-to-place_entity-constraints.patch b/debian/patches/patchset-pf/fixes/0027-sched-fair-Adhere-to-place_entity-constraints.patch new file mode 100644 index 0000000..62d3e4d --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0027-sched-fair-Adhere-to-place_entity-constraints.patch @@ -0,0 +1,62 @@ +From 14b4658d3fa78b169f36e62e722a076a7c50afd8 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Tue, 28 Jan 2025 15:39:49 +0100 +Subject: sched/fair: Adhere to place_entity() constraints + +Mike reports that commit 6d71a9c61604 ("sched/fair: Fix EEVDF entity +placement bug causing scheduling lag") relies on commit 4423af84b297 +("sched/fair: optimize the PLACE_LAG when se->vlag is zero") to not +trip a WARN in place_entity(). + +What happens is that the lag of the very last entity is 0 per +definition -- the average of one element matches the value of that +element. Therefore place_entity() will match the condition skipping +the lag adjustment: + + if (sched_feat(PLACE_LAG) && cfs_rq->nr_queued && se->vlag) { + +Without the 'se->vlag' condition -- it will attempt to adjust the zero +lag even though we're inserting into an empty tree. + +Notably, we should have failed the 'cfs_rq->nr_queued' condition, but +don't because they didn't get updated. + +Additionally, move update_load_add() after placement() as is +consistent with other place_entity() users -- this change is +non-functional, place_entity() does not use cfs_rq->load. + +Fixes: 6d71a9c61604 ("sched/fair: Fix EEVDF entity placement bug causing scheduling lag") +Signed-off-by: Peter Zijlstra (Intel) +Reported-by: Mike Galbraith +Signed-off-by: "Peter Zijlstra (Intel)" +Signed-off-by: Mike Galbraith +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/c216eb4ef0e0e0029c600aefc69d56681cee5581.camel@gmx.de +--- + kernel/sched/fair.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -3808,6 +3808,7 @@ static void reweight_entity(struct cfs_r + update_entity_lag(cfs_rq, se); + se->deadline -= se->vruntime; + se->rel_deadline = 1; ++ cfs_rq->nr_queued--; + if (!curr) + __dequeue_entity(cfs_rq, se); + update_load_sub(&cfs_rq->load, se->load.weight); +@@ -3834,10 +3835,11 @@ static void reweight_entity(struct cfs_r + + enqueue_load_avg(cfs_rq, se); + if (se->on_rq) { +- update_load_add(&cfs_rq->load, se->load.weight); + place_entity(cfs_rq, se, 0); ++ update_load_add(&cfs_rq->load, se->load.weight); + if (!curr) + __enqueue_entity(cfs_rq, se); ++ cfs_rq->nr_queued++; + + /* + * The entity's vruntime has been adjusted, so let's check diff --git a/debian/patches/patchset-pf/fixes/0028-alloc_tag-handle-module-codetag-load-errors-as-modul.patch b/debian/patches/patchset-pf/fixes/0028-alloc_tag-handle-module-codetag-load-errors-as-modul.patch new file mode 100644 index 0000000..462a882 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0028-alloc_tag-handle-module-codetag-load-errors-as-modul.patch @@ -0,0 +1,184 @@ +From 65419a1e04de111460c4f38c47f1db39e71c3357 Mon Sep 17 00:00:00 2001 +From: Suren Baghdasaryan +Date: Wed, 21 May 2025 09:06:02 -0700 +Subject: alloc_tag: handle module codetag load errors as module load failures + +Failures inside codetag_load_module() are currently ignored. As a result +an error there would not cause a module load failure and freeing of the +associated resources. Correct this behavior by propagating the error code +to the caller and handling possible errors. With this change, error to +allocate percpu counters, which happens at this stage, will not be ignored +and will cause a module load failure and freeing of resources. With this +change we also do not need to disable memory allocation profiling when +this error happens, instead we fail to load the module. + +Link: https://lkml.kernel.org/r/20250521160602.1940771-1-surenb@google.com +Fixes: 10075262888b ("alloc_tag: allocate percpu counters for module tags dynamically") +Signed-off-by: Suren Baghdasaryan +Reported-by: Casey Chen +Closes: https://lore.kernel.org/all/20250520231620.15259-1-cachen@purestorage.com/ +Cc: Daniel Gomez +Cc: David Wang <00107082@163.com> +Cc: Kent Overstreet +Cc: Luis Chamberalin +Cc: Petr Pavlu +Cc: Sami Tolvanen +Cc: +Signed-off-by: Andrew Morton +--- + include/linux/codetag.h | 8 ++++---- + kernel/module/main.c | 5 +++-- + lib/alloc_tag.c | 12 +++++++----- + lib/codetag.c | 34 +++++++++++++++++++++++++--------- + 4 files changed, 39 insertions(+), 20 deletions(-) + +--- a/include/linux/codetag.h ++++ b/include/linux/codetag.h +@@ -36,8 +36,8 @@ union codetag_ref { + struct codetag_type_desc { + const char *section; + size_t tag_size; +- void (*module_load)(struct module *mod, +- struct codetag *start, struct codetag *end); ++ int (*module_load)(struct module *mod, ++ struct codetag *start, struct codetag *end); + void (*module_unload)(struct module *mod, + struct codetag *start, struct codetag *end); + #ifdef CONFIG_MODULES +@@ -89,7 +89,7 @@ void *codetag_alloc_module_section(struc + unsigned long align); + void codetag_free_module_sections(struct module *mod); + void codetag_module_replaced(struct module *mod, struct module *new_mod); +-void codetag_load_module(struct module *mod); ++int codetag_load_module(struct module *mod); + void codetag_unload_module(struct module *mod); + + #else /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ +@@ -103,7 +103,7 @@ codetag_alloc_module_section(struct modu + unsigned long align) { return NULL; } + static inline void codetag_free_module_sections(struct module *mod) {} + static inline void codetag_module_replaced(struct module *mod, struct module *new_mod) {} +-static inline void codetag_load_module(struct module *mod) {} ++static inline int codetag_load_module(struct module *mod) { return 0; } + static inline void codetag_unload_module(struct module *mod) {} + + #endif /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ +--- a/kernel/module/main.c ++++ b/kernel/module/main.c +@@ -3399,11 +3399,12 @@ static int load_module(struct load_info + goto sysfs_cleanup; + } + ++ if (codetag_load_module(mod)) ++ goto sysfs_cleanup; ++ + /* Get rid of temporary copy. */ + free_copy(info, flags); + +- codetag_load_module(mod); +- + /* Done! */ + trace_module_load(mod); + +--- a/lib/alloc_tag.c ++++ b/lib/alloc_tag.c +@@ -618,15 +618,16 @@ out: + mas_unlock(&mas); + } + +-static void load_module(struct module *mod, struct codetag *start, struct codetag *stop) ++static int load_module(struct module *mod, struct codetag *start, struct codetag *stop) + { + /* Allocate module alloc_tag percpu counters */ + struct alloc_tag *start_tag; + struct alloc_tag *stop_tag; + struct alloc_tag *tag; + ++ /* percpu counters for core allocations are already statically allocated */ + if (!mod) +- return; ++ return 0; + + start_tag = ct_to_alloc_tag(start); + stop_tag = ct_to_alloc_tag(stop); +@@ -638,12 +639,13 @@ static void load_module(struct module *m + free_percpu(tag->counters); + tag->counters = NULL; + } +- shutdown_mem_profiling(true); +- pr_err("Failed to allocate memory for allocation tag percpu counters in the module %s. Memory allocation profiling is disabled!\n", ++ pr_err("Failed to allocate memory for allocation tag percpu counters in the module %s\n", + mod->name); +- break; ++ return -ENOMEM; + } + } ++ ++ return 0; + } + + static void replace_module(struct module *mod, struct module *new_mod) +--- a/lib/codetag.c ++++ b/lib/codetag.c +@@ -167,6 +167,7 @@ static int codetag_module_init(struct co + { + struct codetag_range range; + struct codetag_module *cmod; ++ int mod_id; + int err; + + range = get_section_range(mod, cttype->desc.section); +@@ -190,11 +191,20 @@ static int codetag_module_init(struct co + cmod->range = range; + + down_write(&cttype->mod_lock); +- err = idr_alloc(&cttype->mod_idr, cmod, 0, 0, GFP_KERNEL); +- if (err >= 0) { +- cttype->count += range_size(cttype, &range); +- if (cttype->desc.module_load) +- cttype->desc.module_load(mod, range.start, range.stop); ++ mod_id = idr_alloc(&cttype->mod_idr, cmod, 0, 0, GFP_KERNEL); ++ if (mod_id >= 0) { ++ if (cttype->desc.module_load) { ++ err = cttype->desc.module_load(mod, range.start, range.stop); ++ if (!err) ++ cttype->count += range_size(cttype, &range); ++ else ++ idr_remove(&cttype->mod_idr, mod_id); ++ } else { ++ cttype->count += range_size(cttype, &range); ++ err = 0; ++ } ++ } else { ++ err = mod_id; + } + up_write(&cttype->mod_lock); + +@@ -295,17 +305,23 @@ void codetag_module_replaced(struct modu + mutex_unlock(&codetag_lock); + } + +-void codetag_load_module(struct module *mod) ++int codetag_load_module(struct module *mod) + { + struct codetag_type *cttype; ++ int ret = 0; + + if (!mod) +- return; ++ return 0; + + mutex_lock(&codetag_lock); +- list_for_each_entry(cttype, &codetag_types, link) +- codetag_module_init(cttype, mod); ++ list_for_each_entry(cttype, &codetag_types, link) { ++ ret = codetag_module_init(cttype, mod); ++ if (ret) ++ break; ++ } + mutex_unlock(&codetag_lock); ++ ++ return ret; + } + + void codetag_unload_module(struct module *mod) diff --git a/debian/patches/patchset-pf/fixes/0029-svcrdma-Unregister-the-device-if-svc_rdma_accept-fai.patch b/debian/patches/patchset-pf/fixes/0029-svcrdma-Unregister-the-device-if-svc_rdma_accept-fai.patch new file mode 100644 index 0000000..7d8cb5d --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0029-svcrdma-Unregister-the-device-if-svc_rdma_accept-fai.patch @@ -0,0 +1,29 @@ +From 3848ddd6068c425b732da6e8c78b047ed28c6114 Mon Sep 17 00:00:00 2001 +From: Chuck Lever +Date: Sun, 27 Apr 2025 12:39:59 -0400 +Subject: svcrdma: Unregister the device if svc_rdma_accept() fails + +To handle device removal, svc_rdma_accept() requests removal +notification for the underlying device when accepting a connection. +However svc_rdma_free() is not invoked if svc_rdma_accept() fails. +There needs to be a matching "unregister" in that case; otherwise +the device cannot be removed. + +Fixes: c4de97f7c454 ("svcrdma: Handle device removal outside of the CM event handler") +Cc: stable@vger.kernel.org +Reviewed-by: Zhu Yanjun +Signed-off-by: Chuck Lever +--- + net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c ++++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c +@@ -575,6 +575,7 @@ static struct svc_xprt *svc_rdma_accept( + if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) + ib_destroy_qp(newxprt->sc_qp); + rdma_destroy_id(newxprt->sc_cm_id); ++ rpcrdma_rn_unregister(dev, &newxprt->sc_rn); + /* This call to put will destroy the transport */ + svc_xprt_put(&newxprt->sc_xprt); + return NULL; diff --git a/debian/patches/patchset-pf/fixes/0030-SUNRPC-Prevent-hang-on-NFS-mount-with-xprtsec-m-tls.patch b/debian/patches/patchset-pf/fixes/0030-SUNRPC-Prevent-hang-on-NFS-mount-with-xprtsec-m-tls.patch new file mode 100644 index 0000000..089a7bf --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0030-SUNRPC-Prevent-hang-on-NFS-mount-with-xprtsec-m-tls.patch @@ -0,0 +1,53 @@ +From 38b409dd5c2fd9496fde05db4fb538a7e3593922 Mon Sep 17 00:00:00 2001 +From: Chuck Lever +Date: Wed, 21 May 2025 16:34:13 -0400 +Subject: SUNRPC: Prevent hang on NFS mount with xprtsec=[m]tls + +Engineers at Hammerspace noticed that sometimes mounting with +"xprtsec=tls" hangs for a minute or so, and then times out, even +when the NFS server is reachable and responsive. + +kTLS shuts off data_ready callbacks if strp->msg_ready is set to +mitigate data_ready callbacks when a full TLS record is not yet +ready to be read from the socket. + +Normally msg_ready is clear when the first TLS record arrives on +a socket. However, I observed that sometimes tls_setsockopt() sets +strp->msg_ready, and that prevents forward progress because +tls_data_ready() becomes a no-op. + +Moreover, Jakub says: "If there's a full record queued at the time +when [tlshd] passes the socket back to the kernel, it's up to the +reader to read the already queued data out." So SunRPC cannot +expect a data_ready call when ingress data is already waiting. + +Add an explicit poll after SunRPC's upper transport is set up to +pick up any data that arrived after the TLS handshake but before +transport set-up is complete. + +Reported-by: Steve Sears +Suggested-by: Jakub Kacinski +Fixes: 75eb6af7acdf ("SUNRPC: Add a TCP-with-TLS RPC transport class") +Tested-by: Mike Snitzer +Reviewed-by: Mike Snitzer +Cc: stable@vger.kernel.org +Signed-off-by: Chuck Lever +Signed-off-by: Anna Schumaker +--- + net/sunrpc/xprtsock.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/net/sunrpc/xprtsock.c ++++ b/net/sunrpc/xprtsock.c +@@ -2740,6 +2740,11 @@ static void xs_tcp_tls_setup_socket(stru + } + rpc_shutdown_client(lower_clnt); + ++ /* Check for ingress data that arrived before the socket's ++ * ->data_ready callback was set up. ++ */ ++ xs_poll_check_readable(upper_transport); ++ + out_unlock: + current_restore_flags(pflags, PF_MEMALLOC); + upper_transport->clnt = NULL; diff --git a/debian/patches/patchset-pf/fixes/0031-hv_netvsc-fix-potential-deadlock-in-netvsc_vf_setxdp.patch b/debian/patches/patchset-pf/fixes/0031-hv_netvsc-fix-potential-deadlock-in-netvsc_vf_setxdp.patch new file mode 100644 index 0000000..e0af3fe --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0031-hv_netvsc-fix-potential-deadlock-in-netvsc_vf_setxdp.patch @@ -0,0 +1,89 @@ +From c3e0e5bd29d97f8e5663026e8c2f25e08f1c4544 Mon Sep 17 00:00:00 2001 +From: Saurabh Sengar +Date: Thu, 29 May 2025 03:18:30 -0700 +Subject: hv_netvsc: fix potential deadlock in netvsc_vf_setxdp() + +The MANA driver's probe registers netdevice via the following call chain: + +mana_probe() + register_netdev() + register_netdevice() + +register_netdevice() calls notifier callback for netvsc driver, +holding the netdev mutex via netdev_lock_ops(). + +Further this netvsc notifier callback end up attempting to acquire the +same lock again in dev_xdp_propagate() leading to deadlock. + +netvsc_netdev_event() + netvsc_vf_setxdp() + dev_xdp_propagate() + +This deadlock was not observed so far because net_shaper_ops was never set, +and thus the lock was effectively a no-op in this case. Fix this by using +netif_xdp_propagate() instead of dev_xdp_propagate() to avoid recursive +locking in this path. + +And, since no deadlock is observed on the other path which is via +netvsc_probe, add the lock exclusivly for that path. + +Also, clean up the unregistration path by removing the unnecessary call to +netvsc_vf_setxdp(), since unregister_netdevice_many_notify() already +performs this cleanup via dev_xdp_uninstall(). + +Fixes: 97246d6d21c2 ("net: hold netdev instance lock during ndo_bpf") +Cc: stable@vger.kernel.org +Signed-off-by: Saurabh Sengar +Tested-by: Erni Sri Satya Vennela +Reviewed-by: Haiyang Zhang +Reviewed-by: Subbaraya Sundeep +Link: https://patch.msgid.link/1748513910-23963-1-git-send-email-ssengar@linux.microsoft.com +Signed-off-by: Jakub Kicinski +--- + drivers/net/hyperv/netvsc_bpf.c | 2 +- + drivers/net/hyperv/netvsc_drv.c | 4 ++-- + net/core/dev.c | 1 + + 3 files changed, 4 insertions(+), 3 deletions(-) + +--- a/drivers/net/hyperv/netvsc_bpf.c ++++ b/drivers/net/hyperv/netvsc_bpf.c +@@ -183,7 +183,7 @@ int netvsc_vf_setxdp(struct net_device * + xdp.command = XDP_SETUP_PROG; + xdp.prog = prog; + +- ret = dev_xdp_propagate(vf_netdev, &xdp); ++ ret = netif_xdp_propagate(vf_netdev, &xdp); + + if (ret && prog) + bpf_prog_put(prog); +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -2462,8 +2462,6 @@ static int netvsc_unregister_vf(struct n + + netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); + +- netvsc_vf_setxdp(vf_netdev, NULL); +- + reinit_completion(&net_device_ctx->vf_add); + netdev_rx_handler_unregister(vf_netdev); + netdev_upper_dev_unlink(vf_netdev, ndev); +@@ -2631,7 +2629,9 @@ static int netvsc_probe(struct hv_device + continue; + + netvsc_prepare_bonding(vf_netdev); ++ netdev_lock_ops(vf_netdev); + netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE); ++ netdev_unlock_ops(vf_netdev); + __netvsc_vf_setup(net, vf_netdev); + break; + } +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -9863,6 +9863,7 @@ int netif_xdp_propagate(struct net_devic + + return dev->netdev_ops->ndo_bpf(dev, bpf); + } ++EXPORT_SYMBOL_GPL(netif_xdp_propagate); + + u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) + { diff --git a/debian/patches/patchset-pf/fixes/0032-net-clear-the-dst-when-changing-skb-protocol.patch b/debian/patches/patchset-pf/fixes/0032-net-clear-the-dst-when-changing-skb-protocol.patch new file mode 100644 index 0000000..370069a --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0032-net-clear-the-dst-when-changing-skb-protocol.patch @@ -0,0 +1,113 @@ +From 0f48fca427618cecf6683fa8e46cb8d0b66bb93d Mon Sep 17 00:00:00 2001 +From: Jakub Kicinski +Date: Mon, 9 Jun 2025 17:12:44 -0700 +Subject: net: clear the dst when changing skb protocol +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +A not-so-careful NAT46 BPF program can crash the kernel +if it indiscriminately flips ingress packets from v4 to v6: + + BUG: kernel NULL pointer dereference, address: 0000000000000000 + ip6_rcv_core (net/ipv6/ip6_input.c:190:20) + ipv6_rcv (net/ipv6/ip6_input.c:306:8) + process_backlog (net/core/dev.c:6186:4) + napi_poll (net/core/dev.c:6906:9) + net_rx_action (net/core/dev.c:7028:13) + do_softirq (kernel/softirq.c:462:3) + netif_rx (net/core/dev.c:5326:3) + dev_loopback_xmit (net/core/dev.c:4015:2) + ip_mc_finish_output (net/ipv4/ip_output.c:363:8) + NF_HOOK (./include/linux/netfilter.h:314:9) + ip_mc_output (net/ipv4/ip_output.c:400:5) + dst_output (./include/net/dst.h:459:9) + ip_local_out (net/ipv4/ip_output.c:130:9) + ip_send_skb (net/ipv4/ip_output.c:1496:8) + udp_send_skb (net/ipv4/udp.c:1040:8) + udp_sendmsg (net/ipv4/udp.c:1328:10) + +The output interface has a 4->6 program attached at ingress. +We try to loop the multicast skb back to the sending socket. +Ingress BPF runs as part of netif_rx(), pushes a valid v6 hdr +and changes skb->protocol to v6. We enter ip6_rcv_core which +tries to use skb_dst(). But the dst is still an IPv4 one left +after IPv4 mcast output. + +Clear the dst in all BPF helpers which change the protocol. +Try to preserve metadata dsts, those may carry non-routing +metadata. + +Cc: stable@vger.kernel.org +Reviewed-by: Maciej Å»enczykowski +Acked-by: Daniel Borkmann +Fixes: d219df60a70e ("bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room()") +Fixes: 1b00e0dfe7d0 ("bpf: update skb->protocol in bpf_skb_net_grow") +Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper") +Reviewed-by: Willem de Bruijn +Link: https://patch.msgid.link/20250610001245.1981782-1-kuba@kernel.org +Signed-off-by: Jakub Kicinski +--- + net/core/filter.c | 19 +++++++++++++------ + 1 file changed, 13 insertions(+), 6 deletions(-) + +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -3232,6 +3232,13 @@ static const struct bpf_func_proto bpf_s + .arg1_type = ARG_PTR_TO_CTX, + }; + ++static void bpf_skb_change_protocol(struct sk_buff *skb, u16 proto) ++{ ++ skb->protocol = htons(proto); ++ if (skb_valid_dst(skb)) ++ skb_dst_drop(skb); ++} ++ + static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) + { + /* Caller already did skb_cow() with len as headroom, +@@ -3328,7 +3335,7 @@ static int bpf_skb_proto_4_to_6(struct s + } + } + +- skb->protocol = htons(ETH_P_IPV6); ++ bpf_skb_change_protocol(skb, ETH_P_IPV6); + skb_clear_hash(skb); + + return 0; +@@ -3358,7 +3365,7 @@ static int bpf_skb_proto_6_to_4(struct s + } + } + +- skb->protocol = htons(ETH_P_IP); ++ bpf_skb_change_protocol(skb, ETH_P_IP); + skb_clear_hash(skb); + + return 0; +@@ -3549,10 +3556,10 @@ static int bpf_skb_net_grow(struct sk_bu + /* Match skb->protocol to new outer l3 protocol */ + if (skb->protocol == htons(ETH_P_IP) && + flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) +- skb->protocol = htons(ETH_P_IPV6); ++ bpf_skb_change_protocol(skb, ETH_P_IPV6); + else if (skb->protocol == htons(ETH_P_IPV6) && + flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4) +- skb->protocol = htons(ETH_P_IP); ++ bpf_skb_change_protocol(skb, ETH_P_IP); + } + + if (skb_is_gso(skb)) { +@@ -3605,10 +3612,10 @@ static int bpf_skb_net_shrink(struct sk_ + /* Match skb->protocol to new outer l3 protocol */ + if (skb->protocol == htons(ETH_P_IP) && + flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) +- skb->protocol = htons(ETH_P_IPV6); ++ bpf_skb_change_protocol(skb, ETH_P_IPV6); + else if (skb->protocol == htons(ETH_P_IPV6) && + flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) +- skb->protocol = htons(ETH_P_IP); ++ bpf_skb_change_protocol(skb, ETH_P_IP); + + if (skb_is_gso(skb)) { + struct skb_shared_info *shinfo = skb_shinfo(skb); diff --git a/debian/patches/patchset-pf/fixes/0033-net_sched-sch_sfq-reject-invalid-perturb-period.patch b/debian/patches/patchset-pf/fixes/0033-net_sched-sch_sfq-reject-invalid-perturb-period.patch new file mode 100644 index 0000000..0951490 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0033-net_sched-sch_sfq-reject-invalid-perturb-period.patch @@ -0,0 +1,67 @@ +From 59765af017c206b162b2ceb8d56a171e40a17719 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Wed, 11 Jun 2025 08:35:01 +0000 +Subject: net_sched: sch_sfq: reject invalid perturb period + +Gerrard Tai reported that SFQ perturb_period has no range check yet, +and this can be used to trigger a race condition fixed in a separate patch. + +We want to make sure ctl->perturb_period * HZ will not overflow +and is positive. + +Tested: + +tc qd add dev lo root sfq perturb -10 # negative value : error +Error: sch_sfq: invalid perturb period. + +tc qd add dev lo root sfq perturb 1000000000 # too big : error +Error: sch_sfq: invalid perturb period. + +tc qd add dev lo root sfq perturb 2000000 # acceptable value +tc -s -d qd sh dev lo +qdisc sfq 8005: root refcnt 2 limit 127p quantum 64Kb depth 127 flows 128 divisor 1024 perturb 2000000sec + Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: Gerrard Tai +Signed-off-by: Eric Dumazet +Cc: stable@vger.kernel.org +Link: https://patch.msgid.link/20250611083501.1810459-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +--- + net/sched/sch_sfq.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/net/sched/sch_sfq.c ++++ b/net/sched/sch_sfq.c +@@ -653,6 +653,14 @@ static int sfq_change(struct Qdisc *sch, + NL_SET_ERR_MSG_MOD(extack, "invalid quantum"); + return -EINVAL; + } ++ ++ if (ctl->perturb_period < 0 || ++ ctl->perturb_period > INT_MAX / HZ) { ++ NL_SET_ERR_MSG_MOD(extack, "invalid perturb period"); ++ return -EINVAL; ++ } ++ perturb_period = ctl->perturb_period * HZ; ++ + if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, + ctl_v1->Wlog, ctl_v1->Scell_log, NULL)) + return -EINVAL; +@@ -669,14 +677,12 @@ static int sfq_change(struct Qdisc *sch, + headdrop = q->headdrop; + maxdepth = q->maxdepth; + maxflows = q->maxflows; +- perturb_period = q->perturb_period; + quantum = q->quantum; + flags = q->flags; + + /* update and validate configuration */ + if (ctl->quantum) + quantum = ctl->quantum; +- perturb_period = ctl->perturb_period * HZ; + if (ctl->flows) + maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); + if (ctl->divisor) { diff --git a/debian/patches/patchset-pf/fixes/0034-posix-cpu-timers-fix-race-between-handle_posix_cpu_t.patch b/debian/patches/patchset-pf/fixes/0034-posix-cpu-timers-fix-race-between-handle_posix_cpu_t.patch new file mode 100644 index 0000000..f3b3a02 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0034-posix-cpu-timers-fix-race-between-handle_posix_cpu_t.patch @@ -0,0 +1,51 @@ +From b504e1cd491c55390370059280d5fbaa045d5543 Mon Sep 17 00:00:00 2001 +From: Oleg Nesterov +Date: Fri, 13 Jun 2025 19:26:50 +0200 +Subject: posix-cpu-timers: fix race between handle_posix_cpu_timers() and + posix_cpu_timer_del() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +If an exiting non-autoreaping task has already passed exit_notify() and +calls handle_posix_cpu_timers() from IRQ, it can be reaped by its parent +or debugger right after unlock_task_sighand(). + +If a concurrent posix_cpu_timer_del() runs at that moment, it won't be +able to detect timer->it.cpu.firing != 0: cpu_timer_task_rcu() and/or +lock_task_sighand() will fail. + +Add the tsk->exit_state check into run_posix_cpu_timers() to fix this. + +This fix is not needed if CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y, because +exit_task_work() is called before exit_notify(). But the check still +makes sense, task_work_add(&tsk->posix_cputimers_work.work) will fail +anyway in this case. + +Cc: stable@vger.kernel.org +Reported-by: Benoît Sevens +Fixes: 0bdd2ed4138e ("sched: run_posix_cpu_timers: Don't check ->exit_state, use lock_task_sighand()") +Signed-off-by: Oleg Nesterov +Signed-off-by: Linus Torvalds +--- + kernel/time/posix-cpu-timers.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/kernel/time/posix-cpu-timers.c ++++ b/kernel/time/posix-cpu-timers.c +@@ -1406,6 +1406,15 @@ void run_posix_cpu_timers(void) + lockdep_assert_irqs_disabled(); + + /* ++ * Ensure that release_task(tsk) can't happen while ++ * handle_posix_cpu_timers() is running. Otherwise, a concurrent ++ * posix_cpu_timer_del() may fail to lock_task_sighand(tsk) and ++ * miss timer->it.cpu.firing != 0. ++ */ ++ if (tsk->exit_state) ++ return; ++ ++ /* + * If the actual expiry is deferred to task work context and the + * work is already scheduled there is no point to do anything here. + */ diff --git a/debian/patches/patchset-pf/fixes/0035-mm-vma-reset-VMA-iterator-on-commit_merge-OOM-failur.patch b/debian/patches/patchset-pf/fixes/0035-mm-vma-reset-VMA-iterator-on-commit_merge-OOM-failur.patch new file mode 100644 index 0000000..f8be3f5 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0035-mm-vma-reset-VMA-iterator-on-commit_merge-OOM-failur.patch @@ -0,0 +1,93 @@ +From d7b5f2aa34c56bd2a2d3cda2a7eb7aeb24df6179 Mon Sep 17 00:00:00 2001 +From: Lorenzo Stoakes +Date: Fri, 6 Jun 2025 13:50:32 +0100 +Subject: mm/vma: reset VMA iterator on commit_merge() OOM failure + +While an OOM failure in commit_merge() isn't really feasible due to the +allocation which might fail (a maple tree pre-allocation) being 'too small +to fail', we do need to handle this case correctly regardless. + +In vma_merge_existing_range(), we can theoretically encounter failures +which result in an OOM error in two ways - firstly dup_anon_vma() might +fail with an OOM error, and secondly commit_merge() failing, ultimately, +to pre-allocate a maple tree node. + +The abort logic for dup_anon_vma() resets the VMA iterator to the initial +range, ensuring that any logic looping on this iterator will correctly +proceed to the next VMA. + +However the commit_merge() abort logic does not do the same thing. This +resulted in a syzbot report occurring because mlockall() iterates through +VMAs, is tolerant of errors, but ended up with an incorrect previous VMA +being specified due to incorrect iterator state. + +While making this change, it became apparent we are duplicating logic - +the logic introduced in commit 41e6ddcaa0f1 ("mm/vma: add give_up_on_oom +option on modify/merge, use in uffd release") duplicates the +vmg->give_up_on_oom check in both abort branches. + +Additionally, we observe that we can perform the anon_dup check safely on +dup_anon_vma() failure, as this will not be modified should this call +fail. + +Finally, we need to reset the iterator in both cases, so now we can simply +use the exact same code to abort for both. + +We remove the VM_WARN_ON(err != -ENOMEM) as it would be silly for this to +be otherwise and it allows us to implement the abort check more neatly. + +Link: https://lkml.kernel.org/r/20250606125032.164249-1-lorenzo.stoakes@oracle.com +Fixes: 47b16d0462a4 ("mm: abort vma_modify() on merge out of memory failure") +Signed-off-by: Lorenzo Stoakes +Reported-by: syzbot+d16409ea9ecc16ed261a@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/linux-mm/6842cc67.a00a0220.29ac89.003b.GAE@google.com/ +Reviewed-by: Pedro Falcato +Reviewed-by: Vlastimil Babka +Reviewed-by: Liam R. Howlett +Cc: Jann Horn +Cc: +Signed-off-by: Andrew Morton +--- + mm/vma.c | 22 ++++------------------ + 1 file changed, 4 insertions(+), 18 deletions(-) + +--- a/mm/vma.c ++++ b/mm/vma.c +@@ -927,26 +927,9 @@ static __must_check struct vm_area_struc + err = dup_anon_vma(next, middle, &anon_dup); + } + +- if (err) ++ if (err || commit_merge(vmg)) + goto abort; + +- err = commit_merge(vmg); +- if (err) { +- VM_WARN_ON(err != -ENOMEM); +- +- if (anon_dup) +- unlink_anon_vmas(anon_dup); +- +- /* +- * We've cleaned up any cloned anon_vma's, no VMAs have been +- * modified, no harm no foul if the user requests that we not +- * report this and just give up, leaving the VMAs unmerged. +- */ +- if (!vmg->give_up_on_oom) +- vmg->state = VMA_MERGE_ERROR_NOMEM; +- return NULL; +- } +- + khugepaged_enter_vma(vmg->target, vmg->flags); + vmg->state = VMA_MERGE_SUCCESS; + return vmg->target; +@@ -955,6 +938,9 @@ abort: + vma_iter_set(vmg->vmi, start); + vma_iter_load(vmg->vmi); + ++ if (anon_dup) ++ unlink_anon_vmas(anon_dup); ++ + /* + * This means we have failed to clone anon_vma's correctly, but no + * actual changes to VMAs have occurred, so no harm no foul - if the diff --git a/debian/patches/patchset-pf/fixes/0036-mm-close-theoretical-race-where-stale-TLB-entries-co.patch b/debian/patches/patchset-pf/fixes/0036-mm-close-theoretical-race-where-stale-TLB-entries-co.patch new file mode 100644 index 0000000..1b23c40 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0036-mm-close-theoretical-race-where-stale-TLB-entries-co.patch @@ -0,0 +1,90 @@ +From db96fe27668a3bb56fa5d745d1c2eed49a95a56f Mon Sep 17 00:00:00 2001 +From: Ryan Roberts +Date: Fri, 6 Jun 2025 10:28:07 +0100 +Subject: mm: close theoretical race where stale TLB entries could linger + +Commit 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a +parallel reclaim leaving stale TLB entries") described a theoretical race +as such: + + +""" +Nadav Amit identified a theoretical race between page reclaim and mprotect +due to TLB flushes being batched outside of the PTL being held. + +He described the race as follows: + + CPU0 CPU1 + ---- ---- + user accesses memory using RW PTE + [PTE now cached in TLB] + try_to_unmap_one() + ==> ptep_get_and_clear() + ==> set_tlb_ubc_flush_pending() + mprotect(addr, PROT_READ) + ==> change_pte_range() + ==> [ PTE non-present - no flush ] + + user writes using cached RW PTE + ... + + try_to_unmap_flush() + +The same type of race exists for reads when protecting for PROT_NONE and +also exists for operations that can leave an old TLB entry behind such as +munmap, mremap and madvise. +""" + +The solution was to introduce flush_tlb_batched_pending() and call it +under the PTL from mprotect/madvise/munmap/mremap to complete any pending +tlb flushes. + +However, while madvise_free_pte_range() and +madvise_cold_or_pageout_pte_range() were both retro-fitted to call +flush_tlb_batched_pending() immediately after initially acquiring the PTL, +they both temporarily release the PTL to split a large folio if they +stumble upon one. In this case, where re-acquiring the PTL +flush_tlb_batched_pending() must be called again, but it previously was +not. Let's fix that. + +There are 2 Fixes: tags here: the first is the commit that fixed +madvise_free_pte_range(). The second is the commit that added +madvise_cold_or_pageout_pte_range(), which looks like it copy/pasted the +faulty pattern from madvise_free_pte_range(). + +This is a theoretical bug discovered during code review. + +Link: https://lkml.kernel.org/r/20250606092809.4194056-1-ryan.roberts@arm.com +Fixes: 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a parallel reclaim leaving stale TLB entries") +Fixes: 9c276cc65a58 ("mm: introduce MADV_COLD") +Signed-off-by: Ryan Roberts +Reviewed-by: Jann Horn +Acked-by: David Hildenbrand +Cc: Liam Howlett +Cc: Lorenzo Stoakes +Cc: Mel Gorman +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +--- + mm/madvise.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/mm/madvise.c ++++ b/mm/madvise.c +@@ -503,6 +503,7 @@ restart: + pte_offset_map_lock(mm, pmd, addr, &ptl); + if (!start_pte) + break; ++ flush_tlb_batched_pending(mm); + arch_enter_lazy_mmu_mode(); + if (!err) + nr = 0; +@@ -736,6 +737,7 @@ static int madvise_free_pte_range(pmd_t + start_pte = pte; + if (!start_pte) + break; ++ flush_tlb_batched_pending(mm); + arch_enter_lazy_mmu_mode(); + if (!err) + nr = 0; diff --git a/debian/patches/patchset-pf/fixes/0037-io_uring-kbuf-don-t-truncate-end-buffer-for-multiple.patch b/debian/patches/patchset-pf/fixes/0037-io_uring-kbuf-don-t-truncate-end-buffer-for-multiple.patch new file mode 100644 index 0000000..d652c23 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0037-io_uring-kbuf-don-t-truncate-end-buffer-for-multiple.patch @@ -0,0 +1,33 @@ +From f8c6b0801edd6f50057610c67120ffb42027f2c2 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Fri, 13 Jun 2025 11:01:49 -0600 +Subject: io_uring/kbuf: don't truncate end buffer for multiple buffer peeks + +If peeking a bunch of buffers, normally io_ring_buffers_peek() will +truncate the end buffer. This isn't optimal as presumably more data will +be arriving later, and hence it's better to stop with the last full +buffer rather than truncate the end buffer. + +Cc: stable@vger.kernel.org +Fixes: 35c8711c8fc4 ("io_uring/kbuf: add helpers for getting/peeking multiple buffers") +Reported-by: Christian Mazakas +Signed-off-by: Jens Axboe +--- + io_uring/kbuf.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -270,8 +270,11 @@ static int io_ring_buffers_peek(struct i + /* truncate end piece, if needed, for non partial buffers */ + if (len > arg->max_len) { + len = arg->max_len; +- if (!(bl->flags & IOBL_INC)) ++ if (!(bl->flags & IOBL_INC)) { ++ if (iov != arg->iovs) ++ break; + buf->len = len; ++ } + } + + iov->iov_base = u64_to_user_ptr(buf->addr); diff --git a/debian/patches/patchset-pf/fixes/0038-nvme-always-punt-polled-uring_cmd-end_io-work-to-tas.patch b/debian/patches/patchset-pf/fixes/0038-nvme-always-punt-polled-uring_cmd-end_io-work-to-tas.patch new file mode 100644 index 0000000..853e609 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0038-nvme-always-punt-polled-uring_cmd-end_io-work-to-tas.patch @@ -0,0 +1,54 @@ +From a2ef8773db38d0c3a41761dbed6fc57afa440161 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Fri, 13 Jun 2025 13:37:41 -0600 +Subject: nvme: always punt polled uring_cmd end_io work to task_work + +Currently NVMe uring_cmd completions will complete locally, if they are +polled. This is done because those completions are always invoked from +task context. And while that is true, there's no guarantee that it's +invoked under the right ring context, or even task. If someone does +NVMe passthrough via multiple threads and with a limited number of +poll queues, then ringA may find completions from ringB. For that case, +completing the request may not be sound. + +Always just punt the passthrough completions via task_work, which will +redirect the completion, if needed. + +Cc: stable@vger.kernel.org +Fixes: 585079b6e425 ("nvme: wire up async polling for io passthrough commands") +Signed-off-by: Jens Axboe +--- + drivers/nvme/host/ioctl.c | 21 +++++++-------------- + 1 file changed, 7 insertions(+), 14 deletions(-) + +--- a/drivers/nvme/host/ioctl.c ++++ b/drivers/nvme/host/ioctl.c +@@ -429,21 +429,14 @@ static enum rq_end_io_ret nvme_uring_cmd + pdu->result = le64_to_cpu(nvme_req(req)->result.u64); + + /* +- * For iopoll, complete it directly. Note that using the uring_cmd +- * helper for this is safe only because we check blk_rq_is_poll(). +- * As that returns false if we're NOT on a polled queue, then it's +- * safe to use the polled completion helper. +- * +- * Otherwise, move the completion to task work. ++ * IOPOLL could potentially complete this request directly, but ++ * if multiple rings are polling on the same queue, then it's possible ++ * for one ring to find completions for another ring. Punting the ++ * completion via task_work will always direct it to the right ++ * location, rather than potentially complete requests for ringA ++ * under iopoll invocations from ringB. + */ +- if (blk_rq_is_poll(req)) { +- if (pdu->bio) +- blk_rq_unmap_user(pdu->bio); +- io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status); +- } else { +- io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); +- } +- ++ io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); + return RQ_END_IO_FREE; + } + diff --git a/debian/patches/patchset-pf/fixes/0039-block-Clear-BIO_EMULATES_ZONE_APPEND-flag-on-BIO-com.patch b/debian/patches/patchset-pf/fixes/0039-block-Clear-BIO_EMULATES_ZONE_APPEND-flag-on-BIO-com.patch new file mode 100644 index 0000000..b7c44a6 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0039-block-Clear-BIO_EMULATES_ZONE_APPEND-flag-on-BIO-com.patch @@ -0,0 +1,33 @@ +From bb51adf56b5adc7075252cd17136c2288c116602 Mon Sep 17 00:00:00 2001 +From: Damien Le Moal +Date: Wed, 11 Jun 2025 09:59:15 +0900 +Subject: block: Clear BIO_EMULATES_ZONE_APPEND flag on BIO completion + +When blk_zone_write_plug_bio_endio() is called for a regular write BIO +used to emulate a zone append operation, that is, a BIO flagged with +BIO_EMULATES_ZONE_APPEND, the BIO operation code is restored to the +original REQ_OP_ZONE_APPEND but the BIO_EMULATES_ZONE_APPEND flag is not +cleared. Clear it to fully return the BIO to its orginal definition. + +Fixes: 9b1ce7f0c6f8 ("block: Implement zone append emulation") +Cc: stable@vger.kernel.org +Signed-off-by: Damien Le Moal +Reviewed-by: Johannes Thumshirn +Reviewed-by: Hannes Reinecke +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20250611005915.89843-1-dlemoal@kernel.org +Signed-off-by: Jens Axboe +--- + block/blk-zoned.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/block/blk-zoned.c ++++ b/block/blk-zoned.c +@@ -1225,6 +1225,7 @@ void blk_zone_write_plug_bio_endio(struc + if (bio_flagged(bio, BIO_EMULATES_ZONE_APPEND)) { + bio->bi_opf &= ~REQ_OP_MASK; + bio->bi_opf |= REQ_OP_ZONE_APPEND; ++ bio_clear_flag(bio, BIO_EMULATES_ZONE_APPEND); + } + + /* diff --git a/debian/patches/patchset-pf/fixes/0040-block-use-plug-request-list-tail-for-one-shot-backme.patch b/debian/patches/patchset-pf/fixes/0040-block-use-plug-request-list-tail-for-one-shot-backme.patch new file mode 100644 index 0000000..ba0cd26 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0040-block-use-plug-request-list-tail-for-one-shot-backme.patch @@ -0,0 +1,65 @@ +From 56ae62470a95ac8249c43f5c0d50da2a83c350e0 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Wed, 11 Jun 2025 08:48:46 -0600 +Subject: block: use plug request list tail for one-shot backmerge attempt + +Previously, the block layer stored the requests in the plug list in +LIFO order. For this reason, blk_attempt_plug_merge() would check +just the head entry for a back merge attempt, and abort after that +unless requests for multiple queues existed in the plug list. If more +than one request is present in the plug list, this makes the one-shot +back merging less useful than before, as it'll always fail to find a +quick merge candidate. + +Use the tail entry for the one-shot merge attempt, which is the last +added request in the list. If that fails, abort immediately unless +there are multiple queues available. If multiple queues are available, +then scan the list. Ideally the latter scan would be a backwards scan +of the list, but as it currently stands, the plug list is singly linked +and hence this isn't easily feasible. + +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/linux-block/20250611121626.7252-1-abuehaze@amazon.com/ +Reported-by: Hazem Mohamed Abuelfotoh +Fixes: e70c301faece ("block: don't reorder requests in blk_add_rq_to_plug") +Signed-off-by: Jens Axboe +--- + block/blk-merge.c | 26 +++++++++++++------------- + 1 file changed, 13 insertions(+), 13 deletions(-) + +--- a/block/blk-merge.c ++++ b/block/blk-merge.c +@@ -1127,20 +1127,20 @@ bool blk_attempt_plug_merge(struct reque + if (!plug || rq_list_empty(&plug->mq_list)) + return false; + +- rq_list_for_each(&plug->mq_list, rq) { +- if (rq->q == q) { +- if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == +- BIO_MERGE_OK) +- return true; +- break; +- } ++ rq = plug->mq_list.tail; ++ if (rq->q == q) ++ return blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == ++ BIO_MERGE_OK; ++ else if (!plug->multiple_queues) ++ return false; + +- /* +- * Only keep iterating plug list for merges if we have multiple +- * queues +- */ +- if (!plug->multiple_queues) +- break; ++ rq_list_for_each(&plug->mq_list, rq) { ++ if (rq->q != q) ++ continue; ++ if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == ++ BIO_MERGE_OK) ++ return true; ++ break; + } + return false; + } diff --git a/debian/patches/patchset-pf/invlpgb/0001-x86-mm-Remove-pv_ops.mmu.tlb_remove_table-call.patch b/debian/patches/patchset-pf/invlpgb/0001-x86-mm-Remove-pv_ops.mmu.tlb_remove_table-call.patch deleted file mode 100644 index 272a209..0000000 --- a/debian/patches/patchset-pf/invlpgb/0001-x86-mm-Remove-pv_ops.mmu.tlb_remove_table-call.patch +++ /dev/null @@ -1,89 +0,0 @@ -From aadea0887cca5739137f109eab0e1b38604c8af8 Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Thu, 13 Feb 2025 11:13:53 -0500 -Subject: x86/mm: Remove pv_ops.mmu.tlb_remove_table call - -Every pv_ops.mmu.tlb_remove_table call ends up calling tlb_remove_table. - -Get rid of the indirection by simply calling tlb_remove_table directly, -and not going through the paravirt function pointers. - -Suggested-by: Qi Zheng -Signed-off-by: Rik van Riel -Signed-off-by: Ingo Molnar -Tested-by: Manali Shukla -Tested-by: Brendan Jackman -Tested-by: Michael Kelley -Link: https://lore.kernel.org/r/20250213161423.449435-3-riel@surriel.com ---- - arch/x86/hyperv/mmu.c | 1 - - arch/x86/include/asm/paravirt.h | 5 ----- - arch/x86/include/asm/paravirt_types.h | 2 -- - arch/x86/kernel/kvm.c | 1 - - arch/x86/kernel/paravirt.c | 1 - - arch/x86/xen/mmu_pv.c | 1 - - 6 files changed, 11 deletions(-) - ---- a/arch/x86/hyperv/mmu.c -+++ b/arch/x86/hyperv/mmu.c -@@ -239,5 +239,4 @@ void hyperv_setup_mmu_ops(void) - - pr_info("Using hypercall for remote TLB flush\n"); - pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi; -- pv_ops.mmu.tlb_remove_table = tlb_remove_table; - } ---- a/arch/x86/include/asm/paravirt.h -+++ b/arch/x86/include/asm/paravirt.h -@@ -91,11 +91,6 @@ static inline void __flush_tlb_multi(con - PVOP_VCALL2(mmu.flush_tlb_multi, cpumask, info); - } - --static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) --{ -- PVOP_VCALL2(mmu.tlb_remove_table, tlb, table); --} -- - static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) - { - PVOP_VCALL1(mmu.exit_mmap, mm); ---- a/arch/x86/include/asm/paravirt_types.h -+++ b/arch/x86/include/asm/paravirt_types.h -@@ -133,8 +133,6 @@ struct pv_mmu_ops { - void (*flush_tlb_multi)(const struct cpumask *cpus, - const struct flush_tlb_info *info); - -- void (*tlb_remove_table)(struct mmu_gather *tlb, void *table); -- - /* Hook for intercepting the destruction of an mm_struct. */ - void (*exit_mmap)(struct mm_struct *mm); - void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc); ---- a/arch/x86/kernel/kvm.c -+++ b/arch/x86/kernel/kvm.c -@@ -838,7 +838,6 @@ static void __init kvm_guest_init(void) - #ifdef CONFIG_SMP - if (pv_tlb_flush_supported()) { - pv_ops.mmu.flush_tlb_multi = kvm_flush_tlb_multi; -- pv_ops.mmu.tlb_remove_table = tlb_remove_table; - pr_info("KVM setup pv remote TLB flush\n"); - } - ---- a/arch/x86/kernel/paravirt.c -+++ b/arch/x86/kernel/paravirt.c -@@ -182,7 +182,6 @@ struct paravirt_patch_template pv_ops = - .mmu.flush_tlb_kernel = native_flush_tlb_global, - .mmu.flush_tlb_one_user = native_flush_tlb_one_user, - .mmu.flush_tlb_multi = native_flush_tlb_multi, -- .mmu.tlb_remove_table = tlb_remove_table, - - .mmu.exit_mmap = paravirt_nop, - .mmu.notify_page_enc_status_changed = paravirt_nop, ---- a/arch/x86/xen/mmu_pv.c -+++ b/arch/x86/xen/mmu_pv.c -@@ -2189,7 +2189,6 @@ static const typeof(pv_ops) xen_mmu_ops - .flush_tlb_kernel = xen_flush_tlb, - .flush_tlb_one_user = xen_flush_tlb_one_user, - .flush_tlb_multi = xen_flush_tlb_multi, -- .tlb_remove_table = tlb_remove_table, - - .pgd_alloc = xen_pgd_alloc, - .pgd_free = xen_pgd_free, diff --git a/debian/patches/patchset-pf/invlpgb/0002-x86-mm-Consolidate-full-flush-threshold-decision.patch b/debian/patches/patchset-pf/invlpgb/0002-x86-mm-Consolidate-full-flush-threshold-decision.patch deleted file mode 100644 index bbcdd0b..0000000 --- a/debian/patches/patchset-pf/invlpgb/0002-x86-mm-Consolidate-full-flush-threshold-decision.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 170f37d1499a28f7a1902e007111867c7cf0147f Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Tue, 25 Feb 2025 22:00:36 -0500 -Subject: x86/mm: Consolidate full flush threshold decision - -Reduce code duplication by consolidating the decision point for whether to do -individual invalidations or a full flush inside get_flush_tlb_info(). - -Suggested-by: Dave Hansen -Signed-off-by: Rik van Riel -Signed-off-by: Borislav Petkov (AMD) -Reviewed-by: Borislav Petkov (AMD) -Acked-by: Dave Hansen -Link: https://lore.kernel.org/r/20250226030129.530345-2-riel@surriel.com ---- - arch/x86/mm/tlb.c | 41 +++++++++++++++++++---------------------- - 1 file changed, 19 insertions(+), 22 deletions(-) - ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -1019,6 +1019,15 @@ static struct flush_tlb_info *get_flush_ - BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1); - #endif - -+ /* -+ * If the number of flushes is so large that a full flush -+ * would be faster, do a full flush. -+ */ -+ if ((end - start) >> stride_shift > tlb_single_page_flush_ceiling) { -+ start = 0; -+ end = TLB_FLUSH_ALL; -+ } -+ - info->start = start; - info->end = end; - info->mm = mm; -@@ -1045,17 +1054,8 @@ void flush_tlb_mm_range(struct mm_struct - bool freed_tables) - { - struct flush_tlb_info *info; -+ int cpu = get_cpu(); - u64 new_tlb_gen; -- int cpu; -- -- cpu = get_cpu(); -- -- /* Should we flush just the requested range? */ -- if ((end == TLB_FLUSH_ALL) || -- ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) { -- start = 0; -- end = TLB_FLUSH_ALL; -- } - - /* This is also a barrier that synchronizes with switch_mm(). */ - new_tlb_gen = inc_mm_tlb_gen(mm); -@@ -1108,22 +1108,19 @@ static void do_kernel_range_flush(void * - - void flush_tlb_kernel_range(unsigned long start, unsigned long end) - { -- /* Balance as user space task's flush, a bit conservative */ -- if (end == TLB_FLUSH_ALL || -- (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) { -- on_each_cpu(do_flush_tlb_all, NULL, 1); -- } else { -- struct flush_tlb_info *info; -+ struct flush_tlb_info *info; - -- preempt_disable(); -- info = get_flush_tlb_info(NULL, start, end, 0, false, -- TLB_GENERATION_INVALID); -+ guard(preempt)(); - -+ info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false, -+ TLB_GENERATION_INVALID); -+ -+ if (info->end == TLB_FLUSH_ALL) -+ on_each_cpu(do_flush_tlb_all, NULL, 1); -+ else - on_each_cpu(do_kernel_range_flush, info, 1); - -- put_flush_tlb_info(); -- preempt_enable(); -- } -+ put_flush_tlb_info(); - } - - /* diff --git a/debian/patches/patchset-pf/invlpgb/0003-x86-mm-Add-INVLPGB-feature-and-Kconfig-entry.patch b/debian/patches/patchset-pf/invlpgb/0003-x86-mm-Add-INVLPGB-feature-and-Kconfig-entry.patch deleted file mode 100644 index 0829775..0000000 --- a/debian/patches/patchset-pf/invlpgb/0003-x86-mm-Add-INVLPGB-feature-and-Kconfig-entry.patch +++ /dev/null @@ -1,103 +0,0 @@ -From acb5a284db4fa3dbbb246ab8fa58da0143cd68ce Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Tue, 25 Feb 2025 22:00:37 -0500 -Subject: x86/mm: Add INVLPGB feature and Kconfig entry - -In addition, the CPU advertises the maximum number of pages that can be -shot down with one INVLPGB instruction in CPUID. Save that information -for later use. - - [ bp: use cpu_has(), typos, massage. ] - -Signed-off-by: Rik van Riel -Signed-off-by: Borislav Petkov (AMD) -Link: https://lore.kernel.org/r/20250226030129.530345-3-riel@surriel.com ---- - arch/x86/Kconfig.cpu | 4 ++++ - arch/x86/include/asm/cpufeatures.h | 1 + - arch/x86/include/asm/disabled-features.h | 8 +++++++- - arch/x86/include/asm/tlbflush.h | 3 +++ - arch/x86/kernel/cpu/amd.c | 6 ++++++ - 5 files changed, 21 insertions(+), 1 deletion(-) - ---- a/arch/x86/Kconfig.cpu -+++ b/arch/x86/Kconfig.cpu -@@ -740,6 +740,10 @@ menuconfig PROCESSOR_SELECT - This lets you choose what x86 vendor support code your kernel - will include. - -+config BROADCAST_TLB_FLUSH -+ def_bool y -+ depends on CPU_SUP_AMD && 64BIT -+ - config CPU_SUP_INTEL - default y - bool "Support Intel processors" if PROCESSOR_SELECT ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -338,6 +338,7 @@ - #define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ - #define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ - #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ -+#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instructions supported */ - #define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ - #define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ - #define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ ---- a/arch/x86/include/asm/disabled-features.h -+++ b/arch/x86/include/asm/disabled-features.h -@@ -129,6 +129,12 @@ - #define DISABLE_SEV_SNP (1 << (X86_FEATURE_SEV_SNP & 31)) - #endif - -+#ifdef CONFIG_BROADCAST_TLB_FLUSH -+#define DISABLE_INVLPGB 0 -+#else -+#define DISABLE_INVLPGB (1 << (X86_FEATURE_INVLPGB & 31)) -+#endif -+ - /* - * Make sure to add features to the correct mask - */ -@@ -146,7 +152,7 @@ - #define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \ - DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK) - #define DISABLED_MASK12 (DISABLE_FRED|DISABLE_LAM) --#define DISABLED_MASK13 0 -+#define DISABLED_MASK13 (DISABLE_INVLPGB) - #define DISABLED_MASK14 0 - #define DISABLED_MASK15 0 - #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \ ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -183,6 +183,9 @@ static inline void cr4_init_shadow(void) - extern unsigned long mmu_cr4_features; - extern u32 *trampoline_cr4_features; - -+/* How many pages can be invalidated with one INVLPGB. */ -+extern u16 invlpgb_count_max; -+ - extern void initialize_tlbstate_and_flush(void); - - /* ---- a/arch/x86/kernel/cpu/amd.c -+++ b/arch/x86/kernel/cpu/amd.c -@@ -29,6 +29,8 @@ - - #include "cpu.h" - -+u16 invlpgb_count_max __ro_after_init; -+ - static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) - { - u32 gprs[8] = { 0 }; -@@ -1145,6 +1147,10 @@ static void cpu_detect_tlb_amd(struct cp - tlb_lli_2m[ENTRIES] = eax & mask; - - tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; -+ -+ /* Max number of pages INVLPGB can invalidate in one shot */ -+ if (cpu_has(c, X86_FEATURE_INVLPGB)) -+ invlpgb_count_max = (cpuid_edx(0x80000008) & 0xffff) + 1; - } - - static const struct cpu_dev amd_cpu_dev = { diff --git a/debian/patches/patchset-pf/invlpgb/0004-x86-mm-Add-INVLPGB-support-code.patch b/debian/patches/patchset-pf/invlpgb/0004-x86-mm-Add-INVLPGB-support-code.patch deleted file mode 100644 index ccd1eef..0000000 --- a/debian/patches/patchset-pf/invlpgb/0004-x86-mm-Add-INVLPGB-support-code.patch +++ /dev/null @@ -1,170 +0,0 @@ -From 27bab4a6ed6ee7b7b0e2d216b8802800ef26b2ad Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Fri, 28 Feb 2025 20:32:30 +0100 -Subject: x86/mm: Add INVLPGB support code - -Add helper functions and definitions needed to use broadcast TLB -invalidation on AMD CPUs. - - [ bp: - - Cleanup commit message - - Improve and expand comments - - push the preemption guards inside the invlpgb* helpers - - merge improvements from dhansen - - add !CONFIG_BROADCAST_TLB_FLUSH function stubs because Clang - can't do DCE properly yet and looks at the inline asm and - complains about it getting a u64 argument on 32-bit code ] - -Signed-off-by: Rik van Riel -Signed-off-by: Borislav Petkov (AMD) -Link: https://lore.kernel.org/r/20250226030129.530345-4-riel@surriel.com ---- - arch/x86/include/asm/tlb.h | 132 +++++++++++++++++++++++++++++++++++++ - 1 file changed, 132 insertions(+) - ---- a/arch/x86/include/asm/tlb.h -+++ b/arch/x86/include/asm/tlb.h -@@ -6,6 +6,9 @@ - static inline void tlb_flush(struct mmu_gather *tlb); - - #include -+#include -+#include -+#include - - static inline void tlb_flush(struct mmu_gather *tlb) - { -@@ -25,4 +28,133 @@ static inline void invlpg(unsigned long - asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); - } - -+enum addr_stride { -+ PTE_STRIDE = 0, -+ PMD_STRIDE = 1 -+}; -+ -+#ifdef CONFIG_BROADCAST_TLB_FLUSH -+/* -+ * INVLPGB does broadcast TLB invalidation across all the CPUs in the system. -+ * -+ * The INVLPGB instruction is weakly ordered, and a batch of invalidations can -+ * be done in a parallel fashion. -+ * -+ * The instruction takes the number of extra pages to invalidate, beyond -+ * the first page, while __invlpgb gets the more human readable number of -+ * pages to invalidate. -+ * -+ * The bits in rax[0:2] determine respectively which components of the address -+ * (VA, PCID, ASID) get compared when flushing. If neither bits are set, *any* -+ * address in the specified range matches. -+ * -+ * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from -+ * this CPU have completed. -+ */ -+static inline void __invlpgb(unsigned long asid, unsigned long pcid, -+ unsigned long addr, u16 nr_pages, -+ enum addr_stride stride, u8 flags) -+{ -+ u32 edx = (pcid << 16) | asid; -+ u32 ecx = (stride << 31) | (nr_pages - 1); -+ u64 rax = addr | flags; -+ -+ /* The low bits in rax are for flags. Verify addr is clean. */ -+ VM_WARN_ON_ONCE(addr & ~PAGE_MASK); -+ -+ /* INVLPGB; supported in binutils >= 2.36. */ -+ asm volatile(".byte 0x0f, 0x01, 0xfe" :: "a" (rax), "c" (ecx), "d" (edx)); -+} -+ -+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags) -+{ -+ __invlpgb(asid, pcid, 0, 1, 0, flags); -+} -+ -+static inline void __tlbsync(void) -+{ -+ /* -+ * TLBSYNC waits for INVLPGB instructions originating on the same CPU -+ * to have completed. Print a warning if the task has been migrated, -+ * and might not be waiting on all the INVLPGBs issued during this TLB -+ * invalidation sequence. -+ */ -+ cant_migrate(); -+ -+ /* TLBSYNC: supported in binutils >= 0.36. */ -+ asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory"); -+} -+#else -+/* Some compilers (I'm looking at you clang!) simply can't do DCE */ -+static inline void __invlpgb(unsigned long asid, unsigned long pcid, -+ unsigned long addr, u16 nr_pages, -+ enum addr_stride s, u8 flags) { } -+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags) { } -+static inline void __tlbsync(void) { } -+#endif -+ -+/* -+ * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination -+ * of the three. For example: -+ * - FLAG_VA | FLAG_INCLUDE_GLOBAL: invalidate all TLB entries at the address -+ * - FLAG_PCID: invalidate all TLB entries matching the PCID -+ * -+ * The first is used to invalidate (kernel) mappings at a particular -+ * address across all processes. -+ * -+ * The latter invalidates all TLB entries matching a PCID. -+ */ -+#define INVLPGB_FLAG_VA BIT(0) -+#define INVLPGB_FLAG_PCID BIT(1) -+#define INVLPGB_FLAG_ASID BIT(2) -+#define INVLPGB_FLAG_INCLUDE_GLOBAL BIT(3) -+#define INVLPGB_FLAG_FINAL_ONLY BIT(4) -+#define INVLPGB_FLAG_INCLUDE_NESTED BIT(5) -+ -+/* The implied mode when all bits are clear: */ -+#define INVLPGB_MODE_ALL_NONGLOBALS 0UL -+ -+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid, -+ unsigned long addr, -+ u16 nr, bool stride) -+{ -+ enum addr_stride str = stride ? PMD_STRIDE : PTE_STRIDE; -+ u8 flags = INVLPGB_FLAG_PCID | INVLPGB_FLAG_VA; -+ -+ __invlpgb(0, pcid, addr, nr, str, flags); -+} -+ -+/* Flush all mappings for a given PCID, not including globals. */ -+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid) -+{ -+ __invlpgb_all(0, pcid, INVLPGB_FLAG_PCID); -+} -+ -+/* Flush all mappings, including globals, for all PCIDs. */ -+static inline void invlpgb_flush_all(void) -+{ -+ /* -+ * TLBSYNC at the end needs to make sure all flushes done on the -+ * current CPU have been executed system-wide. Therefore, make -+ * sure nothing gets migrated in-between but disable preemption -+ * as it is cheaper. -+ */ -+ guard(preempt)(); -+ __invlpgb_all(0, 0, INVLPGB_FLAG_INCLUDE_GLOBAL); -+ __tlbsync(); -+} -+ -+/* Flush addr, including globals, for all PCIDs. */ -+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr) -+{ -+ __invlpgb(0, 0, addr, nr, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL); -+} -+ -+/* Flush all mappings for all PCIDs except globals. */ -+static inline void invlpgb_flush_all_nonglobals(void) -+{ -+ guard(preempt)(); -+ __invlpgb_all(0, 0, INVLPGB_MODE_ALL_NONGLOBALS); -+ __tlbsync(); -+} - #endif /* _ASM_X86_TLB_H */ diff --git a/debian/patches/patchset-pf/invlpgb/0005-x86-mm-Use-INVLPGB-for-kernel-TLB-flushes.patch b/debian/patches/patchset-pf/invlpgb/0005-x86-mm-Use-INVLPGB-for-kernel-TLB-flushes.patch deleted file mode 100644 index f94c71b..0000000 --- a/debian/patches/patchset-pf/invlpgb/0005-x86-mm-Use-INVLPGB-for-kernel-TLB-flushes.patch +++ /dev/null @@ -1,97 +0,0 @@ -From 358d71638f420efe8f7e05ce74aefe13e9320283 Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Tue, 25 Feb 2025 22:00:39 -0500 -Subject: x86/mm: Use INVLPGB for kernel TLB flushes - -Use broadcast TLB invalidation for kernel addresses when available. -Remove the need to send IPIs for kernel TLB flushes. - - [ bp: Integrate dhansen's comments additions, merge the - flush_tlb_all() change into this one too. ] - -Signed-off-by: Rik van Riel -Signed-off-by: Borislav Petkov (AMD) -Link: https://lore.kernel.org/r/20250226030129.530345-5-riel@surriel.com ---- - arch/x86/mm/tlb.c | 48 +++++++++++++++++++++++++++++++++++++++++++---- - 1 file changed, 44 insertions(+), 4 deletions(-) - ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -1083,7 +1083,6 @@ void flush_tlb_mm_range(struct mm_struct - mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end); - } - -- - static void do_flush_tlb_all(void *info) - { - count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); -@@ -1093,7 +1092,32 @@ static void do_flush_tlb_all(void *info) - void flush_tlb_all(void) - { - count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); -- on_each_cpu(do_flush_tlb_all, NULL, 1); -+ -+ /* First try (faster) hardware-assisted TLB invalidation. */ -+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) -+ invlpgb_flush_all(); -+ else -+ /* Fall back to the IPI-based invalidation. */ -+ on_each_cpu(do_flush_tlb_all, NULL, 1); -+} -+ -+/* Flush an arbitrarily large range of memory with INVLPGB. */ -+static void invlpgb_kernel_range_flush(struct flush_tlb_info *info) -+{ -+ unsigned long addr, nr; -+ -+ for (addr = info->start; addr < info->end; addr += nr << PAGE_SHIFT) { -+ nr = (info->end - addr) >> PAGE_SHIFT; -+ -+ /* -+ * INVLPGB has a limit on the size of ranges it can -+ * flush. Break up large flushes. -+ */ -+ nr = clamp_val(nr, 1, invlpgb_count_max); -+ -+ invlpgb_flush_addr_nosync(addr, nr); -+ } -+ __tlbsync(); - } - - static void do_kernel_range_flush(void *info) -@@ -1106,6 +1130,22 @@ static void do_kernel_range_flush(void * - flush_tlb_one_kernel(addr); - } - -+static void kernel_tlb_flush_all(struct flush_tlb_info *info) -+{ -+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) -+ invlpgb_flush_all(); -+ else -+ on_each_cpu(do_flush_tlb_all, NULL, 1); -+} -+ -+static void kernel_tlb_flush_range(struct flush_tlb_info *info) -+{ -+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) -+ invlpgb_kernel_range_flush(info); -+ else -+ on_each_cpu(do_kernel_range_flush, info, 1); -+} -+ - void flush_tlb_kernel_range(unsigned long start, unsigned long end) - { - struct flush_tlb_info *info; -@@ -1116,9 +1156,9 @@ void flush_tlb_kernel_range(unsigned lon - TLB_GENERATION_INVALID); - - if (info->end == TLB_FLUSH_ALL) -- on_each_cpu(do_flush_tlb_all, NULL, 1); -+ kernel_tlb_flush_all(info); - else -- on_each_cpu(do_kernel_range_flush, info, 1); -+ kernel_tlb_flush_range(info); - - put_flush_tlb_info(); - } diff --git a/debian/patches/patchset-pf/invlpgb/0006-x86-mm-Use-broadcast-TLB-flushing-in-page-reclaim.patch b/debian/patches/patchset-pf/invlpgb/0006-x86-mm-Use-broadcast-TLB-flushing-in-page-reclaim.patch deleted file mode 100644 index 37eeb74..0000000 --- a/debian/patches/patchset-pf/invlpgb/0006-x86-mm-Use-broadcast-TLB-flushing-in-page-reclaim.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 7cf099de79e12d6c4949f733c8cbb241bb08f07a Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Tue, 25 Feb 2025 22:00:41 -0500 -Subject: x86/mm: Use broadcast TLB flushing in page reclaim - -Page reclaim tracks only the CPU(s) where the TLB needs to be flushed, rather -than all the individual mappings that may be getting invalidated. - -Use broadcast TLB flushing when that is available. - - [ bp: Massage commit message. ] - -Signed-off-by: Rik van Riel -Signed-off-by: Borislav Petkov (AMD) -Link: https://lore.kernel.org/r/20250226030129.530345-7-riel@surriel.com ---- - arch/x86/mm/tlb.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -1339,7 +1339,9 @@ void arch_tlbbatch_flush(struct arch_tlb - * a local TLB flush is needed. Optimize this use-case by calling - * flush_tlb_func_local() directly in this case. - */ -- if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) { -+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { -+ invlpgb_flush_all_nonglobals(); -+ } else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) { - flush_tlb_multi(&batch->cpumask, info); - } else if (cpumask_test_cpu(cpu, &batch->cpumask)) { - lockdep_assert_irqs_enabled(); diff --git a/debian/patches/patchset-pf/invlpgb/0007-x86-mm-Add-global-ASID-allocation-helper-functions.patch b/debian/patches/patchset-pf/invlpgb/0007-x86-mm-Add-global-ASID-allocation-helper-functions.patch deleted file mode 100644 index 6eb5b9f..0000000 --- a/debian/patches/patchset-pf/invlpgb/0007-x86-mm-Add-global-ASID-allocation-helper-functions.patch +++ /dev/null @@ -1,286 +0,0 @@ -From f9ecaaca7ac26789d7d3e0d8022b7c99599dc8a3 Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Tue, 25 Feb 2025 22:00:42 -0500 -Subject: x86/mm: Add global ASID allocation helper functions - -Add functions to manage global ASID space. Multithreaded processes that are -simultaneously active on 4 or more CPUs can get a global ASID, resulting in the -same PCID being used for that process on every CPU. - -This in turn will allow the kernel to use hardware-assisted TLB flushing -through AMD INVLPGB or Intel RAR for these processes. - - [ bp: - - Extend use_global_asid() comment - - s/X86_BROADCAST_TLB_FLUSH/BROADCAST_TLB_FLUSH/g - - other touchups ] - -Signed-off-by: Rik van Riel -Signed-off-by: Borislav Petkov (AMD) -Link: https://lore.kernel.org/r/20250226030129.530345-8-riel@surriel.com ---- - arch/x86/include/asm/mmu.h | 12 +++ - arch/x86/include/asm/mmu_context.h | 2 + - arch/x86/include/asm/tlbflush.h | 37 +++++++ - arch/x86/mm/tlb.c | 154 ++++++++++++++++++++++++++++- - 4 files changed, 202 insertions(+), 3 deletions(-) - ---- a/arch/x86/include/asm/mmu.h -+++ b/arch/x86/include/asm/mmu.h -@@ -69,6 +69,18 @@ typedef struct { - u16 pkey_allocation_map; - s16 execute_only_pkey; - #endif -+ -+#ifdef CONFIG_BROADCAST_TLB_FLUSH -+ /* -+ * The global ASID will be a non-zero value when the process has -+ * the same ASID across all CPUs, allowing it to make use of -+ * hardware-assisted remote TLB invalidation like AMD INVLPGB. -+ */ -+ u16 global_asid; -+ -+ /* The process is transitioning to a new global ASID number. */ -+ bool asid_transition; -+#endif - } mm_context_t; - - #define INIT_MM_CONTEXT(mm) \ ---- a/arch/x86/include/asm/mmu_context.h -+++ b/arch/x86/include/asm/mmu_context.h -@@ -139,6 +139,8 @@ static inline void mm_reset_untag_mask(s - #define enter_lazy_tlb enter_lazy_tlb - extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); - -+extern void mm_free_global_asid(struct mm_struct *mm); -+ - /* - * Init a new mm. Used on mm copies, like at fork() - * and on mm's that are brand-new, like at execve(). ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -6,6 +6,7 @@ - #include - #include - -+#include - #include - #include - #include -@@ -234,6 +235,42 @@ void flush_tlb_one_kernel(unsigned long - void flush_tlb_multi(const struct cpumask *cpumask, - const struct flush_tlb_info *info); - -+static inline bool is_dyn_asid(u16 asid) -+{ -+ return asid < TLB_NR_DYN_ASIDS; -+} -+ -+#ifdef CONFIG_BROADCAST_TLB_FLUSH -+static inline u16 mm_global_asid(struct mm_struct *mm) -+{ -+ u16 asid; -+ -+ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) -+ return 0; -+ -+ asid = smp_load_acquire(&mm->context.global_asid); -+ -+ /* mm->context.global_asid is either 0, or a global ASID */ -+ VM_WARN_ON_ONCE(asid && is_dyn_asid(asid)); -+ -+ return asid; -+} -+ -+static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) -+{ -+ /* -+ * Notably flush_tlb_mm_range() -> broadcast_tlb_flush() -> -+ * finish_asid_transition() needs to observe asid_transition = true -+ * once it observes global_asid. -+ */ -+ mm->context.asid_transition = true; -+ smp_store_release(&mm->context.global_asid, asid); -+} -+#else -+static inline u16 mm_global_asid(struct mm_struct *mm) { return 0; } -+static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) { } -+#endif /* CONFIG_BROADCAST_TLB_FLUSH */ -+ - #ifdef CONFIG_PARAVIRT - #include - #endif ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -74,13 +74,15 @@ - * use different names for each of them: - * - * ASID - [0, TLB_NR_DYN_ASIDS-1] -- * the canonical identifier for an mm -+ * the canonical identifier for an mm, dynamically allocated on each CPU -+ * [TLB_NR_DYN_ASIDS, MAX_ASID_AVAILABLE-1] -+ * the canonical, global identifier for an mm, identical across all CPUs - * -- * kPCID - [1, TLB_NR_DYN_ASIDS] -+ * kPCID - [1, MAX_ASID_AVAILABLE] - * the value we write into the PCID part of CR3; corresponds to the - * ASID+1, because PCID 0 is special. - * -- * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS] -+ * uPCID - [2048 + 1, 2048 + MAX_ASID_AVAILABLE] - * for KPTI each mm has two address spaces and thus needs two - * PCID values, but we can still do with a single ASID denomination - * for each mm. Corresponds to kPCID + 2048. -@@ -252,6 +254,152 @@ static void choose_new_asid(struct mm_st - } - - /* -+ * Global ASIDs are allocated for multi-threaded processes that are -+ * active on multiple CPUs simultaneously, giving each of those -+ * processes the same PCID on every CPU, for use with hardware-assisted -+ * TLB shootdown on remote CPUs, like AMD INVLPGB or Intel RAR. -+ * -+ * These global ASIDs are held for the lifetime of the process. -+ */ -+static DEFINE_RAW_SPINLOCK(global_asid_lock); -+static u16 last_global_asid = MAX_ASID_AVAILABLE; -+static DECLARE_BITMAP(global_asid_used, MAX_ASID_AVAILABLE); -+static DECLARE_BITMAP(global_asid_freed, MAX_ASID_AVAILABLE); -+static int global_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1; -+ -+/* -+ * When the search for a free ASID in the global ASID space reaches -+ * MAX_ASID_AVAILABLE, a global TLB flush guarantees that previously -+ * freed global ASIDs are safe to re-use. -+ * -+ * This way the global flush only needs to happen at ASID rollover -+ * time, and not at ASID allocation time. -+ */ -+static void reset_global_asid_space(void) -+{ -+ lockdep_assert_held(&global_asid_lock); -+ -+ invlpgb_flush_all_nonglobals(); -+ -+ /* -+ * The TLB flush above makes it safe to re-use the previously -+ * freed global ASIDs. -+ */ -+ bitmap_andnot(global_asid_used, global_asid_used, -+ global_asid_freed, MAX_ASID_AVAILABLE); -+ bitmap_clear(global_asid_freed, 0, MAX_ASID_AVAILABLE); -+ -+ /* Restart the search from the start of global ASID space. */ -+ last_global_asid = TLB_NR_DYN_ASIDS; -+} -+ -+static u16 allocate_global_asid(void) -+{ -+ u16 asid; -+ -+ lockdep_assert_held(&global_asid_lock); -+ -+ /* The previous allocation hit the edge of available address space */ -+ if (last_global_asid >= MAX_ASID_AVAILABLE - 1) -+ reset_global_asid_space(); -+ -+ asid = find_next_zero_bit(global_asid_used, MAX_ASID_AVAILABLE, last_global_asid); -+ -+ if (asid >= MAX_ASID_AVAILABLE && !global_asid_available) { -+ /* This should never happen. */ -+ VM_WARN_ONCE(1, "Unable to allocate global ASID despite %d available\n", -+ global_asid_available); -+ return 0; -+ } -+ -+ /* Claim this global ASID. */ -+ __set_bit(asid, global_asid_used); -+ last_global_asid = asid; -+ global_asid_available--; -+ return asid; -+} -+ -+/* -+ * Check whether a process is currently active on more than @threshold CPUs. -+ * This is a cheap estimation on whether or not it may make sense to assign -+ * a global ASID to this process, and use broadcast TLB invalidation. -+ */ -+static bool mm_active_cpus_exceeds(struct mm_struct *mm, int threshold) -+{ -+ int count = 0; -+ int cpu; -+ -+ /* This quick check should eliminate most single threaded programs. */ -+ if (cpumask_weight(mm_cpumask(mm)) <= threshold) -+ return false; -+ -+ /* Slower check to make sure. */ -+ for_each_cpu(cpu, mm_cpumask(mm)) { -+ /* Skip the CPUs that aren't really running this process. */ -+ if (per_cpu(cpu_tlbstate.loaded_mm, cpu) != mm) -+ continue; -+ -+ if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu)) -+ continue; -+ -+ if (++count > threshold) -+ return true; -+ } -+ return false; -+} -+ -+/* -+ * Assign a global ASID to the current process, protecting against -+ * races between multiple threads in the process. -+ */ -+static void use_global_asid(struct mm_struct *mm) -+{ -+ u16 asid; -+ -+ guard(raw_spinlock_irqsave)(&global_asid_lock); -+ -+ /* This process is already using broadcast TLB invalidation. */ -+ if (mm_global_asid(mm)) -+ return; -+ -+ /* -+ * The last global ASID was consumed while waiting for the lock. -+ * -+ * If this fires, a more aggressive ASID reuse scheme might be -+ * needed. -+ */ -+ if (!global_asid_available) { -+ VM_WARN_ONCE(1, "Ran out of global ASIDs\n"); -+ return; -+ } -+ -+ asid = allocate_global_asid(); -+ if (!asid) -+ return; -+ -+ mm_assign_global_asid(mm, asid); -+} -+ -+void mm_free_global_asid(struct mm_struct *mm) -+{ -+ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) -+ return; -+ -+ if (!mm_global_asid(mm)) -+ return; -+ -+ guard(raw_spinlock_irqsave)(&global_asid_lock); -+ -+ /* The global ASID can be re-used only after flush at wrap-around. */ -+#ifdef CONFIG_BROADCAST_TLB_FLUSH -+ __set_bit(mm->context.global_asid, global_asid_freed); -+ -+ mm->context.global_asid = 0; -+ global_asid_available++; -+#endif -+} -+ -+/* - * Given an ASID, flush the corresponding user ASID. We can delay this - * until the next time we switch to it. - * diff --git a/debian/patches/patchset-pf/invlpgb/0008-x86-mm-Handle-global-ASID-context-switch-and-TLB-flu.patch b/debian/patches/patchset-pf/invlpgb/0008-x86-mm-Handle-global-ASID-context-switch-and-TLB-flu.patch deleted file mode 100644 index 6c0eebd..0000000 --- a/debian/patches/patchset-pf/invlpgb/0008-x86-mm-Handle-global-ASID-context-switch-and-TLB-flu.patch +++ /dev/null @@ -1,219 +0,0 @@ -From b56070b9f121507cabe352e03f0c534db2d5adc7 Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Tue, 25 Feb 2025 22:00:43 -0500 -Subject: x86/mm: Handle global ASID context switch and TLB flush - -Do context switch and TLB flush support for processes that use a global -ASID and PCID across all CPUs. - -At both context switch time and TLB flush time, it needs to be checked whether -a task is switching to a global ASID, and, if so, reload the TLB with the new -ASID as appropriate. - -In both code paths, the TLB flush is avoided if a global ASID is used, because -the global ASIDs are always kept up to date across CPUs, even when the -process is not running on a CPU. - - [ bp: - - Massage - - :%s/\/cpu_feature_enabled/cgi - ] - -Signed-off-by: Rik van Riel -Signed-off-by: Borislav Petkov (AMD) -Link: https://lore.kernel.org/r/20250226030129.530345-9-riel@surriel.com ---- - arch/x86/include/asm/tlbflush.h | 14 ++++++ - arch/x86/mm/tlb.c | 77 ++++++++++++++++++++++++++++++--- - 2 files changed, 84 insertions(+), 7 deletions(-) - ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -240,6 +240,11 @@ static inline bool is_dyn_asid(u16 asid) - return asid < TLB_NR_DYN_ASIDS; - } - -+static inline bool is_global_asid(u16 asid) -+{ -+ return !is_dyn_asid(asid); -+} -+ - #ifdef CONFIG_BROADCAST_TLB_FLUSH - static inline u16 mm_global_asid(struct mm_struct *mm) - { -@@ -266,9 +271,18 @@ static inline void mm_assign_global_asid - mm->context.asid_transition = true; - smp_store_release(&mm->context.global_asid, asid); - } -+ -+static inline bool mm_in_asid_transition(struct mm_struct *mm) -+{ -+ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) -+ return false; -+ -+ return mm && READ_ONCE(mm->context.asid_transition); -+} - #else - static inline u16 mm_global_asid(struct mm_struct *mm) { return 0; } - static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) { } -+static inline bool mm_in_asid_transition(struct mm_struct *mm) { return false; } - #endif /* CONFIG_BROADCAST_TLB_FLUSH */ - - #ifdef CONFIG_PARAVIRT ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -227,6 +227,20 @@ static void choose_new_asid(struct mm_st - return; - } - -+ /* -+ * TLB consistency for global ASIDs is maintained with hardware assisted -+ * remote TLB flushing. Global ASIDs are always up to date. -+ */ -+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { -+ u16 global_asid = mm_global_asid(next); -+ -+ if (global_asid) { -+ *new_asid = global_asid; -+ *need_flush = false; -+ return; -+ } -+ } -+ - if (this_cpu_read(cpu_tlbstate.invalidate_other)) - clear_asid_other(); - -@@ -400,6 +414,23 @@ void mm_free_global_asid(struct mm_struc - } - - /* -+ * Is the mm transitioning from a CPU-local ASID to a global ASID? -+ */ -+static bool mm_needs_global_asid(struct mm_struct *mm, u16 asid) -+{ -+ u16 global_asid = mm_global_asid(mm); -+ -+ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) -+ return false; -+ -+ /* Process is transitioning to a global ASID */ -+ if (global_asid && asid != global_asid) -+ return true; -+ -+ return false; -+} -+ -+/* - * Given an ASID, flush the corresponding user ASID. We can delay this - * until the next time we switch to it. - * -@@ -704,7 +735,8 @@ void switch_mm_irqs_off(struct mm_struct - */ - if (prev == next) { - /* Not actually switching mm's */ -- VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != -+ VM_WARN_ON(is_dyn_asid(prev_asid) && -+ this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != - next->context.ctx_id); - - /* -@@ -721,6 +753,20 @@ void switch_mm_irqs_off(struct mm_struct - !cpumask_test_cpu(cpu, mm_cpumask(next)))) - cpumask_set_cpu(cpu, mm_cpumask(next)); - -+ /* Check if the current mm is transitioning to a global ASID */ -+ if (mm_needs_global_asid(next, prev_asid)) { -+ next_tlb_gen = atomic64_read(&next->context.tlb_gen); -+ choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); -+ goto reload_tlb; -+ } -+ -+ /* -+ * Broadcast TLB invalidation keeps this ASID up to date -+ * all the time. -+ */ -+ if (is_global_asid(prev_asid)) -+ return; -+ - /* - * If the CPU is not in lazy TLB mode, we are just switching - * from one thread in a process to another thread in the same -@@ -755,6 +801,13 @@ void switch_mm_irqs_off(struct mm_struct - cond_mitigation(tsk); - - /* -+ * Let nmi_uaccess_okay() and finish_asid_transition() -+ * know that CR3 is changing. -+ */ -+ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); -+ barrier(); -+ -+ /* - * Leave this CPU in prev's mm_cpumask. Atomic writes to - * mm_cpumask can be expensive under contention. The CPU - * will be removed lazily at TLB flush time. -@@ -768,18 +821,12 @@ void switch_mm_irqs_off(struct mm_struct - next_tlb_gen = atomic64_read(&next->context.tlb_gen); - - choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); -- -- /* -- * Indicate that CR3 is about to change. nmi_uaccess_okay() -- * and others are sensitive to the window where mm_cpumask(), -- * CR3 and cpu_tlbstate.loaded_mm are not all in sync. -- */ -- this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); -- barrier(); - } - -+reload_tlb: - new_lam = mm_lam_cr3_mask(next); - if (need_flush) { -+ VM_WARN_ON_ONCE(is_global_asid(new_asid)); - this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); - this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); - load_new_mm_cr3(next->pgd, new_asid, new_lam, true); -@@ -898,7 +945,7 @@ static void flush_tlb_func(void *info) - const struct flush_tlb_info *f = info; - struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); - u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); -- u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen); -+ u64 local_tlb_gen; - bool local = smp_processor_id() == f->initiating_cpu; - unsigned long nr_invalidate = 0; - u64 mm_tlb_gen; -@@ -921,6 +968,16 @@ static void flush_tlb_func(void *info) - if (unlikely(loaded_mm == &init_mm)) - return; - -+ /* Reload the ASID if transitioning into or out of a global ASID */ -+ if (mm_needs_global_asid(loaded_mm, loaded_mm_asid)) { -+ switch_mm_irqs_off(NULL, loaded_mm, NULL); -+ loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); -+ } -+ -+ /* Broadcast ASIDs are always kept up to date with INVLPGB. */ -+ if (is_global_asid(loaded_mm_asid)) -+ return; -+ - VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) != - loaded_mm->context.ctx_id); - -@@ -938,6 +995,8 @@ static void flush_tlb_func(void *info) - return; - } - -+ local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen); -+ - if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID && - f->new_tlb_gen <= local_tlb_gen)) { - /* -@@ -1120,7 +1179,7 @@ STATIC_NOPV void native_flush_tlb_multi( - * up on the new contents of what used to be page tables, while - * doing a speculative memory access. - */ -- if (info->freed_tables) -+ if (info->freed_tables || mm_in_asid_transition(info->mm)) - on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true); - else - on_each_cpu_cond_mask(should_flush_tlb, flush_tlb_func, diff --git a/debian/patches/patchset-pf/invlpgb/0009-x86-mm-Add-global-ASID-process-exit-helpers.patch b/debian/patches/patchset-pf/invlpgb/0009-x86-mm-Add-global-ASID-process-exit-helpers.patch deleted file mode 100644 index 47228b9..0000000 --- a/debian/patches/patchset-pf/invlpgb/0009-x86-mm-Add-global-ASID-process-exit-helpers.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 6d3b8545e2c3c638363fb449a99b5a6cbab87a49 Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Tue, 25 Feb 2025 22:00:44 -0500 -Subject: x86/mm: Add global ASID process exit helpers - -A global ASID is allocated for the lifetime of a process. Free the global ASID -at process exit time. - - [ bp: Massage, create helpers, hide details inside them. ] - -Signed-off-by: Rik van Riel -Signed-off-by: Borislav Petkov (AMD) -Link: https://lore.kernel.org/r/20250226030129.530345-10-riel@surriel.com ---- - arch/x86/include/asm/mmu_context.h | 8 +++++++- - arch/x86/include/asm/tlbflush.h | 9 +++++++++ - 2 files changed, 16 insertions(+), 1 deletion(-) - ---- a/arch/x86/include/asm/mmu_context.h -+++ b/arch/x86/include/asm/mmu_context.h -@@ -2,7 +2,6 @@ - #ifndef _ASM_X86_MMU_CONTEXT_H - #define _ASM_X86_MMU_CONTEXT_H - --#include - #include - #include - #include -@@ -13,6 +12,7 @@ - #include - #include - #include -+#include - - extern atomic64_t last_mm_ctx_id; - -@@ -139,6 +139,9 @@ static inline void mm_reset_untag_mask(s - #define enter_lazy_tlb enter_lazy_tlb - extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); - -+#define mm_init_global_asid mm_init_global_asid -+extern void mm_init_global_asid(struct mm_struct *mm); -+ - extern void mm_free_global_asid(struct mm_struct *mm); - - /* -@@ -163,6 +166,8 @@ static inline int init_new_context(struc - mm->context.execute_only_pkey = -1; - } - #endif -+ -+ mm_init_global_asid(mm); - mm_reset_untag_mask(mm); - init_new_context_ldt(mm); - return 0; -@@ -172,6 +177,7 @@ static inline int init_new_context(struc - static inline void destroy_context(struct mm_struct *mm) - { - destroy_context_ldt(mm); -+ mm_free_global_asid(mm); - } - - extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -261,6 +261,14 @@ static inline u16 mm_global_asid(struct - return asid; - } - -+static inline void mm_init_global_asid(struct mm_struct *mm) -+{ -+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { -+ mm->context.global_asid = 0; -+ mm->context.asid_transition = false; -+ } -+} -+ - static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) - { - /* -@@ -281,6 +289,7 @@ static inline bool mm_in_asid_transition - } - #else - static inline u16 mm_global_asid(struct mm_struct *mm) { return 0; } -+static inline void mm_init_global_asid(struct mm_struct *mm) { } - static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) { } - static inline bool mm_in_asid_transition(struct mm_struct *mm) { return false; } - #endif /* CONFIG_BROADCAST_TLB_FLUSH */ diff --git a/debian/patches/patchset-pf/invlpgb/0010-x86-mm-Enable-broadcast-TLB-invalidation-for-multi-t.patch b/debian/patches/patchset-pf/invlpgb/0010-x86-mm-Enable-broadcast-TLB-invalidation-for-multi-t.patch deleted file mode 100644 index cf2505d..0000000 --- a/debian/patches/patchset-pf/invlpgb/0010-x86-mm-Enable-broadcast-TLB-invalidation-for-multi-t.patch +++ /dev/null @@ -1,219 +0,0 @@ -From 077e9ceb65f514ea63afc65cce86ce8677e77012 Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Tue, 25 Feb 2025 22:00:45 -0500 -Subject: x86/mm: Enable broadcast TLB invalidation for multi-threaded - processes - -There is not enough room in the 12-bit ASID address space to hand out -broadcast ASIDs to every process. Only hand out broadcast ASIDs to processes -when they are observed to be simultaneously running on 4 or more CPUs. - -This also allows single threaded process to continue using the cheaper, local -TLB invalidation instructions like INVLPGB. - -Due to the structure of flush_tlb_mm_range(), the INVLPGB flushing is done in -a generically named broadcast_tlb_flush() function which can later also be -used for Intel RAR. - -Combined with the removal of unnecessary lru_add_drain calls() (see -https://lore.kernel.org/r/20241219153253.3da9e8aa@fangorn) this results in -a nice performance boost for the will-it-scale tlb_flush2_threads test on an -AMD Milan system with 36 cores: - - - vanilla kernel: 527k loops/second - - lru_add_drain removal: 731k loops/second - - only INVLPGB: 527k loops/second - - lru_add_drain + INVLPGB: 1157k loops/second - -Profiling with only the INVLPGB changes showed while TLB invalidation went -down from 40% of the total CPU time to only around 4% of CPU time, the -contention simply moved to the LRU lock. - -Fixing both at the same time about doubles the number of iterations per second -from this case. - -Comparing will-it-scale tlb_flush2_threads with several different numbers of -threads on a 72 CPU AMD Milan shows similar results. The number represents the -total number of loops per second across all the threads: - - threads tip INVLPGB - - 1 315k 304k - 2 423k 424k - 4 644k 1032k - 8 652k 1267k - 16 737k 1368k - 32 759k 1199k - 64 636k 1094k - 72 609k 993k - -1 and 2 thread performance is similar with and without INVLPGB, because -INVLPGB is only used on processes using 4 or more CPUs simultaneously. - -The number is the median across 5 runs. - -Some numbers closer to real world performance can be found at Phoronix, thanks -to Michael: - -https://www.phoronix.com/news/AMD-INVLPGB-Linux-Benefits - - [ bp: - - Massage - - :%s/\/cpu_feature_enabled/cgi - - :%s/\/mm_clear_asid_transition/cgi - - Fold in a 0day bot fix: https://lore.kernel.org/oe-kbuild-all/202503040000.GtiWUsBm-lkp@intel.com - ] - -Signed-off-by: Rik van Riel -Signed-off-by: Borislav Petkov (AMD) -Reviewed-by: Nadav Amit -Link: https://lore.kernel.org/r/20250226030129.530345-11-riel@surriel.com ---- - arch/x86/include/asm/tlbflush.h | 6 ++ - arch/x86/mm/tlb.c | 104 +++++++++++++++++++++++++++++++- - 2 files changed, 109 insertions(+), 1 deletion(-) - ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -280,6 +280,11 @@ static inline void mm_assign_global_asid - smp_store_release(&mm->context.global_asid, asid); - } - -+static inline void mm_clear_asid_transition(struct mm_struct *mm) -+{ -+ WRITE_ONCE(mm->context.asid_transition, false); -+} -+ - static inline bool mm_in_asid_transition(struct mm_struct *mm) - { - if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) -@@ -291,6 +296,7 @@ static inline bool mm_in_asid_transition - static inline u16 mm_global_asid(struct mm_struct *mm) { return 0; } - static inline void mm_init_global_asid(struct mm_struct *mm) { } - static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) { } -+static inline void mm_clear_asid_transition(struct mm_struct *mm) { } - static inline bool mm_in_asid_transition(struct mm_struct *mm) { return false; } - #endif /* CONFIG_BROADCAST_TLB_FLUSH */ - ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -431,6 +431,105 @@ static bool mm_needs_global_asid(struct - } - - /* -+ * x86 has 4k ASIDs (2k when compiled with KPTI), but the largest x86 -+ * systems have over 8k CPUs. Because of this potential ASID shortage, -+ * global ASIDs are handed out to processes that have frequent TLB -+ * flushes and are active on 4 or more CPUs simultaneously. -+ */ -+static void consider_global_asid(struct mm_struct *mm) -+{ -+ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) -+ return; -+ -+ /* Check every once in a while. */ -+ if ((current->pid & 0x1f) != (jiffies & 0x1f)) -+ return; -+ -+ /* -+ * Assign a global ASID if the process is active on -+ * 4 or more CPUs simultaneously. -+ */ -+ if (mm_active_cpus_exceeds(mm, 3)) -+ use_global_asid(mm); -+} -+ -+static void finish_asid_transition(struct flush_tlb_info *info) -+{ -+ struct mm_struct *mm = info->mm; -+ int bc_asid = mm_global_asid(mm); -+ int cpu; -+ -+ if (!mm_in_asid_transition(mm)) -+ return; -+ -+ for_each_cpu(cpu, mm_cpumask(mm)) { -+ /* -+ * The remote CPU is context switching. Wait for that to -+ * finish, to catch the unlikely case of it switching to -+ * the target mm with an out of date ASID. -+ */ -+ while (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) == LOADED_MM_SWITCHING) -+ cpu_relax(); -+ -+ if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) != mm) -+ continue; -+ -+ /* -+ * If at least one CPU is not using the global ASID yet, -+ * send a TLB flush IPI. The IPI should cause stragglers -+ * to transition soon. -+ * -+ * This can race with the CPU switching to another task; -+ * that results in a (harmless) extra IPI. -+ */ -+ if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm_asid, cpu)) != bc_asid) { -+ flush_tlb_multi(mm_cpumask(info->mm), info); -+ return; -+ } -+ } -+ -+ /* All the CPUs running this process are using the global ASID. */ -+ mm_clear_asid_transition(mm); -+} -+ -+static void broadcast_tlb_flush(struct flush_tlb_info *info) -+{ -+ bool pmd = info->stride_shift == PMD_SHIFT; -+ unsigned long asid = mm_global_asid(info->mm); -+ unsigned long addr = info->start; -+ -+ /* -+ * TLB flushes with INVLPGB are kicked off asynchronously. -+ * The inc_mm_tlb_gen() guarantees page table updates are done -+ * before these TLB flushes happen. -+ */ -+ if (info->end == TLB_FLUSH_ALL) { -+ invlpgb_flush_single_pcid_nosync(kern_pcid(asid)); -+ /* Do any CPUs supporting INVLPGB need PTI? */ -+ if (cpu_feature_enabled(X86_FEATURE_PTI)) -+ invlpgb_flush_single_pcid_nosync(user_pcid(asid)); -+ } else do { -+ unsigned long nr = 1; -+ -+ if (info->stride_shift <= PMD_SHIFT) { -+ nr = (info->end - addr) >> info->stride_shift; -+ nr = clamp_val(nr, 1, invlpgb_count_max); -+ } -+ -+ invlpgb_flush_user_nr_nosync(kern_pcid(asid), addr, nr, pmd); -+ if (cpu_feature_enabled(X86_FEATURE_PTI)) -+ invlpgb_flush_user_nr_nosync(user_pcid(asid), addr, nr, pmd); -+ -+ addr += nr << info->stride_shift; -+ } while (addr < info->end); -+ -+ finish_asid_transition(info); -+ -+ /* Wait for the INVLPGBs kicked off above to finish. */ -+ __tlbsync(); -+} -+ -+/* - * Given an ASID, flush the corresponding user ASID. We can delay this - * until the next time we switch to it. - * -@@ -1275,9 +1374,12 @@ void flush_tlb_mm_range(struct mm_struct - * a local TLB flush is needed. Optimize this use-case by calling - * flush_tlb_func_local() directly in this case. - */ -- if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { -+ if (mm_global_asid(mm)) { -+ broadcast_tlb_flush(info); -+ } else if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { - info->trim_cpumask = should_trim_cpumask(mm); - flush_tlb_multi(mm_cpumask(mm), info); -+ consider_global_asid(mm); - } else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) { - lockdep_assert_irqs_enabled(); - local_irq_disable(); diff --git a/debian/patches/patchset-pf/invlpgb/0011-x86-mm-Enable-AMD-translation-cache-extensions.patch b/debian/patches/patchset-pf/invlpgb/0011-x86-mm-Enable-AMD-translation-cache-extensions.patch deleted file mode 100644 index 004d0c5..0000000 --- a/debian/patches/patchset-pf/invlpgb/0011-x86-mm-Enable-AMD-translation-cache-extensions.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 1994cff363a37aff5b1232ca9f757b02ae244956 Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Tue, 25 Feb 2025 22:00:47 -0500 -Subject: x86/mm: Enable AMD translation cache extensions - -With AMD TCE (translation cache extensions) only the intermediate mappings -that cover the address range zapped by INVLPG / INVLPGB get invalidated, -rather than all intermediate mappings getting zapped at every TLB invalidation. - -This can help reduce the TLB miss rate, by keeping more intermediate mappings -in the cache. - -From the AMD manual: - -Translation Cache Extension (TCE) Bit. Bit 15, read/write. Setting this bit to -1 changes how the INVLPG, INVLPGB, and INVPCID instructions operate on TLB -entries. When this bit is 0, these instructions remove the target PTE from the -TLB as well as all upper-level table entries that are cached in the TLB, -whether or not they are associated with the target PTE. When this bit is set, -these instructions will remove the target PTE and only those upper-level -entries that lead to the target PTE in the page table hierarchy, leaving -unrelated upper-level entries intact. - - [ bp: use cpu_has()... I know, it is a mess. ] - -Signed-off-by: Rik van Riel -Signed-off-by: Borislav Petkov (AMD) -Link: https://lore.kernel.org/r/20250226030129.530345-13-riel@surriel.com ---- - arch/x86/include/asm/msr-index.h | 2 ++ - arch/x86/kernel/cpu/amd.c | 4 ++++ - tools/arch/x86/include/asm/msr-index.h | 2 ++ - 3 files changed, 8 insertions(+) - ---- a/arch/x86/include/asm/msr-index.h -+++ b/arch/x86/include/asm/msr-index.h -@@ -25,6 +25,7 @@ - #define _EFER_SVME 12 /* Enable virtualization */ - #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ - #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ -+#define _EFER_TCE 15 /* Enable Translation Cache Extensions */ - #define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */ - - #define EFER_SCE (1<<_EFER_SCE) -@@ -34,6 +35,7 @@ - #define EFER_SVME (1<<_EFER_SVME) - #define EFER_LMSLE (1<<_EFER_LMSLE) - #define EFER_FFXSR (1<<_EFER_FFXSR) -+#define EFER_TCE (1<<_EFER_TCE) - #define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) - - /* ---- a/arch/x86/kernel/cpu/amd.c -+++ b/arch/x86/kernel/cpu/amd.c -@@ -1081,6 +1081,10 @@ static void init_amd(struct cpuinfo_x86 - - /* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */ - clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE); -+ -+ /* Enable Translation Cache Extension */ -+ if (cpu_has(c, X86_FEATURE_TCE)) -+ msr_set_bit(MSR_EFER, _EFER_TCE); - } - - #ifdef CONFIG_X86_32 ---- a/tools/arch/x86/include/asm/msr-index.h -+++ b/tools/arch/x86/include/asm/msr-index.h -@@ -25,6 +25,7 @@ - #define _EFER_SVME 12 /* Enable virtualization */ - #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ - #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ -+#define _EFER_TCE 15 /* Enable Translation Cache Extensions */ - #define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */ - - #define EFER_SCE (1<<_EFER_SCE) -@@ -34,6 +35,7 @@ - #define EFER_SVME (1<<_EFER_SVME) - #define EFER_LMSLE (1<<_EFER_LMSLE) - #define EFER_FFXSR (1<<_EFER_FFXSR) -+#define EFER_TCE (1<<_EFER_TCE) - #define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) - - /* diff --git a/debian/patches/patchset-pf/invlpgb/0012-x86-mm-Always-set-the-ASID-valid-bit-for-the-INVLPGB.patch b/debian/patches/patchset-pf/invlpgb/0012-x86-mm-Always-set-the-ASID-valid-bit-for-the-INVLPGB.patch deleted file mode 100644 index 5ff3979..0000000 --- a/debian/patches/patchset-pf/invlpgb/0012-x86-mm-Always-set-the-ASID-valid-bit-for-the-INVLPGB.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 5932a2c8122050c4a2f71588778feb0677fe32b4 Mon Sep 17 00:00:00 2001 -From: Tom Lendacky -Date: Tue, 4 Mar 2025 12:59:56 +0100 -Subject: x86/mm: Always set the ASID valid bit for the INVLPGB instruction - -When executing the INVLPGB instruction on a bare-metal host or hypervisor, if -the ASID valid bit is not set, the instruction will flush the TLB entries that -match the specified criteria for any ASID, not just the those of the host. If -virtual machines are running on the system, this may result in inadvertent -flushes of guest TLB entries. - -When executing the INVLPGB instruction in a guest and the INVLPGB instruction is -not intercepted by the hypervisor, the hardware will replace the requested ASID -with the guest ASID and set the ASID valid bit before doing the broadcast -invalidation. Thus a guest is only able to flush its own TLB entries. - -So to limit the host TLB flushing reach, always set the ASID valid bit using an -ASID value of 0 (which represents the host/hypervisor). This will will result in -the desired effect in both host and guest. - -Signed-off-by: Tom Lendacky -Signed-off-by: Borislav Petkov (AMD) -Link: https://lore.kernel.org/r/20250304120449.GHZ8bsYYyEBOKQIxBm@fat_crate.local ---- - arch/x86/include/asm/tlb.h | 58 +++++++++++++++++++++----------------- - 1 file changed, 32 insertions(+), 26 deletions(-) - ---- a/arch/x86/include/asm/tlb.h -+++ b/arch/x86/include/asm/tlb.h -@@ -33,6 +33,27 @@ enum addr_stride { - PMD_STRIDE = 1 - }; - -+/* -+ * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination -+ * of the three. For example: -+ * - FLAG_VA | FLAG_INCLUDE_GLOBAL: invalidate all TLB entries at the address -+ * - FLAG_PCID: invalidate all TLB entries matching the PCID -+ * -+ * The first is used to invalidate (kernel) mappings at a particular -+ * address across all processes. -+ * -+ * The latter invalidates all TLB entries matching a PCID. -+ */ -+#define INVLPGB_FLAG_VA BIT(0) -+#define INVLPGB_FLAG_PCID BIT(1) -+#define INVLPGB_FLAG_ASID BIT(2) -+#define INVLPGB_FLAG_INCLUDE_GLOBAL BIT(3) -+#define INVLPGB_FLAG_FINAL_ONLY BIT(4) -+#define INVLPGB_FLAG_INCLUDE_NESTED BIT(5) -+ -+/* The implied mode when all bits are clear: */ -+#define INVLPGB_MODE_ALL_NONGLOBALS 0UL -+ - #ifdef CONFIG_BROADCAST_TLB_FLUSH - /* - * INVLPGB does broadcast TLB invalidation across all the CPUs in the system. -@@ -40,14 +61,20 @@ enum addr_stride { - * The INVLPGB instruction is weakly ordered, and a batch of invalidations can - * be done in a parallel fashion. - * -- * The instruction takes the number of extra pages to invalidate, beyond -- * the first page, while __invlpgb gets the more human readable number of -- * pages to invalidate. -+ * The instruction takes the number of extra pages to invalidate, beyond the -+ * first page, while __invlpgb gets the more human readable number of pages to -+ * invalidate. - * - * The bits in rax[0:2] determine respectively which components of the address - * (VA, PCID, ASID) get compared when flushing. If neither bits are set, *any* - * address in the specified range matches. - * -+ * Since it is desired to only flush TLB entries for the ASID that is executing -+ * the instruction (a host/hypervisor or a guest), the ASID valid bit should -+ * always be set. On a host/hypervisor, the hardware will use the ASID value -+ * specified in EDX[15:0] (which should be 0). On a guest, the hardware will -+ * use the actual ASID value of the guest. -+ * - * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from - * this CPU have completed. - */ -@@ -55,9 +82,9 @@ static inline void __invlpgb(unsigned lo - unsigned long addr, u16 nr_pages, - enum addr_stride stride, u8 flags) - { -- u32 edx = (pcid << 16) | asid; -+ u64 rax = addr | flags | INVLPGB_FLAG_ASID; - u32 ecx = (stride << 31) | (nr_pages - 1); -- u64 rax = addr | flags; -+ u32 edx = (pcid << 16) | asid; - - /* The low bits in rax are for flags. Verify addr is clean. */ - VM_WARN_ON_ONCE(addr & ~PAGE_MASK); -@@ -93,27 +120,6 @@ static inline void __invlpgb_all(unsigne - static inline void __tlbsync(void) { } - #endif - --/* -- * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination -- * of the three. For example: -- * - FLAG_VA | FLAG_INCLUDE_GLOBAL: invalidate all TLB entries at the address -- * - FLAG_PCID: invalidate all TLB entries matching the PCID -- * -- * The first is used to invalidate (kernel) mappings at a particular -- * address across all processes. -- * -- * The latter invalidates all TLB entries matching a PCID. -- */ --#define INVLPGB_FLAG_VA BIT(0) --#define INVLPGB_FLAG_PCID BIT(1) --#define INVLPGB_FLAG_ASID BIT(2) --#define INVLPGB_FLAG_INCLUDE_GLOBAL BIT(3) --#define INVLPGB_FLAG_FINAL_ONLY BIT(4) --#define INVLPGB_FLAG_INCLUDE_NESTED BIT(5) -- --/* The implied mode when all bits are clear: */ --#define INVLPGB_MODE_ALL_NONGLOBALS 0UL -- - static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid, - unsigned long addr, - u16 nr, bool stride) diff --git a/debian/patches/patchset-pf/invlpgb/0013-x86-mm-Only-do-broadcast-flush-from-reclaim-if-pages.patch b/debian/patches/patchset-pf/invlpgb/0013-x86-mm-Only-do-broadcast-flush-from-reclaim-if-pages.patch deleted file mode 100644 index 1492c8d..0000000 --- a/debian/patches/patchset-pf/invlpgb/0013-x86-mm-Only-do-broadcast-flush-from-reclaim-if-pages.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 0e0a5ca37a8e3b06f450f4093ba1b6d6f33c2161 Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Wed, 19 Mar 2025 13:25:20 -0400 -Subject: x86/mm: Only do broadcast flush from reclaim if pages were unmapped - -Track whether pages were unmapped from any MM (even ones with a currently -empty mm_cpumask) by the reclaim code, to figure out whether or not -broadcast TLB flush should be done when reclaim finishes. - -The reason any MM must be tracked, and not only ones contributing to the -tlbbatch cpumask, is that broadcast ASIDs are expected to be kept up to -date even on CPUs where the MM is not currently active. - -This change allows reclaim to avoid doing TLB flushes when only clean page -cache pages and/or slab memory were reclaimed, which is fairly common. - -( This is a simpler alternative to the code that was in my INVLPGB series - before, and it seems to capture most of the benefit due to how common - it is to reclaim only page cache. ) - -Signed-off-by: Rik van Riel -Signed-off-by: Ingo Molnar -Cc: Dave Hansen -Cc: Andy Lutomirski -Cc: Peter Zijlstra -Cc: Linus Torvalds -Link: https://lore.kernel.org/r/20250319132520.6b10ad90@fangorn ---- - arch/x86/include/asm/tlbbatch.h | 5 +++++ - arch/x86/include/asm/tlbflush.h | 1 + - arch/x86/mm/tlb.c | 3 ++- - 3 files changed, 8 insertions(+), 1 deletion(-) - ---- a/arch/x86/include/asm/tlbbatch.h -+++ b/arch/x86/include/asm/tlbbatch.h -@@ -10,6 +10,11 @@ struct arch_tlbflush_unmap_batch { - * the PFNs being flushed.. - */ - struct cpumask cpumask; -+ /* -+ * Set if pages were unmapped from any MM, even one that does not -+ * have active CPUs in its cpumask. -+ */ -+ bool unmapped_pages; - }; - - #endif /* _ARCH_X86_TLBBATCH_H */ ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -353,6 +353,7 @@ static inline void arch_tlbbatch_add_pen - { - inc_mm_tlb_gen(mm); - cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); -+ batch->unmapped_pages = true; - mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); - } - ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -1648,8 +1648,9 @@ void arch_tlbbatch_flush(struct arch_tlb - * a local TLB flush is needed. Optimize this use-case by calling - * flush_tlb_func_local() directly in this case. - */ -- if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { -+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB) && batch->unmapped_pages) { - invlpgb_flush_all_nonglobals(); -+ batch->unmapped_pages = false; - } else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) { - flush_tlb_multi(&batch->cpumask, info); - } else if (cpumask_test_cpu(cpu, &batch->cpumask)) { diff --git a/debian/patches/patchset-pf/invlpgb/0014-x86-mm-Eliminate-window-where-TLB-flushes-may-be-ina.patch b/debian/patches/patchset-pf/invlpgb/0014-x86-mm-Eliminate-window-where-TLB-flushes-may-be-ina.patch deleted file mode 100644 index 894e68f..0000000 --- a/debian/patches/patchset-pf/invlpgb/0014-x86-mm-Eliminate-window-where-TLB-flushes-may-be-ina.patch +++ /dev/null @@ -1,92 +0,0 @@ -From 6ae491224973eb4013ee67a8c05c420f057d5fee Mon Sep 17 00:00:00 2001 -From: Dave Hansen -Date: Thu, 8 May 2025 15:41:32 -0700 -Subject: x86/mm: Eliminate window where TLB flushes may be inadvertently - skipped - -tl;dr: There is a window in the mm switching code where the new CR3 is -set and the CPU should be getting TLB flushes for the new mm. But -should_flush_tlb() has a bug and suppresses the flush. Fix it by -widening the window where should_flush_tlb() sends an IPI. - -Long Version: - -=== History === - -There were a few things leading up to this. - -First, updating mm_cpumask() was observed to be too expensive, so it was -made lazier. But being lazy caused too many unnecessary IPIs to CPUs -due to the now-lazy mm_cpumask(). So code was added to cull -mm_cpumask() periodically[2]. But that culling was a bit too aggressive -and skipped sending TLB flushes to CPUs that need them. So here we are -again. - -=== Problem === - -The too-aggressive code in should_flush_tlb() strikes in this window: - - // Turn on IPIs for this CPU/mm combination, but only - // if should_flush_tlb() agrees: - cpumask_set_cpu(cpu, mm_cpumask(next)); - - next_tlb_gen = atomic64_read(&next->context.tlb_gen); - choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); - load_new_mm_cr3(need_flush); - // ^ After 'need_flush' is set to false, IPIs *MUST* - // be sent to this CPU and not be ignored. - - this_cpu_write(cpu_tlbstate.loaded_mm, next); - // ^ Not until this point does should_flush_tlb() - // become true! - -should_flush_tlb() will suppress TLB flushes between load_new_mm_cr3() -and writing to 'loaded_mm', which is a window where they should not be -suppressed. Whoops. - -=== Solution === - -Thankfully, the fuzzy "just about to write CR3" window is already marked -with loaded_mm==LOADED_MM_SWITCHING. Simply checking for that state in -should_flush_tlb() is sufficient to ensure that the CPU is targeted with -an IPI. - -This will cause more TLB flush IPIs. But the window is relatively small -and I do not expect this to cause any kind of measurable performance -impact. - -Update the comment where LOADED_MM_SWITCHING is written since it grew -yet another user. - -Peter Z also raised a concern that should_flush_tlb() might not observe -'loaded_mm' and 'is_lazy' in the same order that switch_mm_irqs_off() -writes them. Add a barrier to ensure that they are observed in the -order they are written. - -Signed-off-by: Dave Hansen -Acked-by: Rik van Riel -Link: https://lore.kernel.org/oe-lkp/202411282207.6bd28eae-lkp@intel.com/ [1] -Fixes: 6db2526c1d69 ("x86/mm/tlb: Only trim the mm_cpumask once a second") [2] -Reported-by: Stephen Dolan -Cc: stable@vger.kernel.org -Acked-by: Ingo Molnar -Acked-by: Peter Zijlstra (Intel) -Signed-off-by: Linus Torvalds ---- - arch/x86/mm/tlb.c | 22 +++++++++++++++++++--- - 1 file changed, 19 insertions(+), 3 deletions(-) - ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -900,8 +900,9 @@ void switch_mm_irqs_off(struct mm_struct - cond_mitigation(tsk); - - /* -- * Let nmi_uaccess_okay() and finish_asid_transition() -- * know that CR3 is changing. -+ * Indicate that CR3 is about to change. nmi_uaccess_okay() -+ * and others are sensitive to the window where mm_cpumask(), -+ * CR3 and cpu_tlbstate.loaded_mm are not all in sync. - */ - this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); - barrier(); diff --git a/debian/patches/patchset-pf/fixes/0003-ice-mark-ice_write_prof_mask_reg-as-noinline.patch b/debian/patches/patchset-pf/kbuild/0001-ice-mark-ice_write_prof_mask_reg-as-noinline.patch similarity index 95% rename from debian/patches/patchset-pf/fixes/0003-ice-mark-ice_write_prof_mask_reg-as-noinline.patch rename to debian/patches/patchset-pf/kbuild/0001-ice-mark-ice_write_prof_mask_reg-as-noinline.patch index ef5b39f..596976c 100644 --- a/debian/patches/patchset-pf/fixes/0003-ice-mark-ice_write_prof_mask_reg-as-noinline.patch +++ b/debian/patches/patchset-pf/kbuild/0001-ice-mark-ice_write_prof_mask_reg-as-noinline.patch @@ -1,4 +1,4 @@ -From e3d18eed972374cfbac1e58cf109209b07c1e27e Mon Sep 17 00:00:00 2001 +From 3400d11fad849dae6015e448c83d6e90f8a6ef35 Mon Sep 17 00:00:00 2001 From: Oleksandr Natalenko Date: Tue, 8 Apr 2025 12:02:36 +0200 Subject: ice: mark ice_write_prof_mask_reg() as noinline diff --git a/debian/patches/patchset-pf/fixes/0006-wifi-mac80211-mark-copy_mesh_setup-as-noinline.patch b/debian/patches/patchset-pf/kbuild/0002-wifi-mac80211-mark-copy_mesh_setup-as-noinline.patch similarity index 95% rename from debian/patches/patchset-pf/fixes/0006-wifi-mac80211-mark-copy_mesh_setup-as-noinline.patch rename to debian/patches/patchset-pf/kbuild/0002-wifi-mac80211-mark-copy_mesh_setup-as-noinline.patch index 7877332..e4f798d 100644 --- a/debian/patches/patchset-pf/fixes/0006-wifi-mac80211-mark-copy_mesh_setup-as-noinline.patch +++ b/debian/patches/patchset-pf/kbuild/0002-wifi-mac80211-mark-copy_mesh_setup-as-noinline.patch @@ -1,4 +1,4 @@ -From f762c206076d274ecb0e2f3d9b6cbca361ebb246 Mon Sep 17 00:00:00 2001 +From 1615cc0c7d979a1c211f349c8c28ee8afb9ad57d Mon Sep 17 00:00:00 2001 From: Oleksandr Natalenko Date: Thu, 1 May 2025 20:22:53 +0200 Subject: wifi: mac80211: mark copy_mesh_setup() as noinline diff --git a/debian/patches/patchset-pf/nfs/0001-NFSD-unregister-filesystem-in-case-genl_register_fam.patch b/debian/patches/patchset-pf/nfs/0001-NFSD-unregister-filesystem-in-case-genl_register_fam.patch new file mode 100644 index 0000000..febf497 --- /dev/null +++ b/debian/patches/patchset-pf/nfs/0001-NFSD-unregister-filesystem-in-case-genl_register_fam.patch @@ -0,0 +1,39 @@ +From c207229d3f7b851d246f1904bc4cab7ae9ada58b Mon Sep 17 00:00:00 2001 +From: Maninder Singh +Date: Thu, 6 Mar 2025 14:50:06 +0530 +Subject: NFSD: unregister filesystem in case genl_register_family() fails + +With rpc_status netlink support, unregister of register_filesystem() +was missed in case of genl_register_family() fails. + +Correcting it by making new label. + +Fixes: bd9d6a3efa97 ("NFSD: add rpc_status netlink support") +Cc: stable@vger.kernel.org +Signed-off-by: Maninder Singh +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +--- + fs/nfsd/nfsctl.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/fs/nfsd/nfsctl.c ++++ b/fs/nfsd/nfsctl.c +@@ -2305,7 +2305,7 @@ static int __init init_nfsd(void) + goto out_free_cld; + retval = register_filesystem(&nfsd_fs_type); + if (retval) +- goto out_free_all; ++ goto out_free_nfsd4; + retval = genl_register_family(&nfsd_nl_family); + if (retval) + goto out_free_all; +@@ -2313,6 +2313,8 @@ static int __init init_nfsd(void) + + return 0; + out_free_all: ++ unregister_filesystem(&nfsd_fs_type); ++out_free_nfsd4: + nfsd4_destroy_laundry_wq(); + out_free_cld: + unregister_cld_notifier(); diff --git a/debian/patches/patchset-pf/nfs/0002-NFSD-fix-race-between-nfsd-registration-and-exports_.patch b/debian/patches/patchset-pf/nfs/0002-NFSD-fix-race-between-nfsd-registration-and-exports_.patch new file mode 100644 index 0000000..88d4282 --- /dev/null +++ b/debian/patches/patchset-pf/nfs/0002-NFSD-fix-race-between-nfsd-registration-and-exports_.patch @@ -0,0 +1,162 @@ +From bda3cf19bcf44807c401b807dee83aadda959287 Mon Sep 17 00:00:00 2001 +From: Maninder Singh +Date: Thu, 6 Mar 2025 14:50:07 +0530 +Subject: NFSD: fix race between nfsd registration and exports_proc + +As of now nfsd calls create_proc_exports_entry() at start of init_nfsd +and cleanup by remove_proc_entry() at last of exit_nfsd. + +Which causes kernel OOPs if there is race between below 2 operations: +(i) exportfs -r +(ii) mount -t nfsd none /proc/fs/nfsd + +for 5.4 kernel ARM64: + +CPU 1: +el1_irq+0xbc/0x180 +arch_counter_get_cntvct+0x14/0x18 +running_clock+0xc/0x18 +preempt_count_add+0x88/0x110 +prep_new_page+0xb0/0x220 +get_page_from_freelist+0x2d8/0x1778 +__alloc_pages_nodemask+0x15c/0xef0 +__vmalloc_node_range+0x28c/0x478 +__vmalloc_node_flags_caller+0x8c/0xb0 +kvmalloc_node+0x88/0xe0 +nfsd_init_net+0x6c/0x108 [nfsd] +ops_init+0x44/0x170 +register_pernet_operations+0x114/0x270 +register_pernet_subsys+0x34/0x50 +init_nfsd+0xa8/0x718 [nfsd] +do_one_initcall+0x54/0x2e0 + +CPU 2 : +Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010 + +PC is at : exports_net_open+0x50/0x68 [nfsd] + +Call trace: +exports_net_open+0x50/0x68 [nfsd] +exports_proc_open+0x2c/0x38 [nfsd] +proc_reg_open+0xb8/0x198 +do_dentry_open+0x1c4/0x418 +vfs_open+0x38/0x48 +path_openat+0x28c/0xf18 +do_filp_open+0x70/0xe8 +do_sys_open+0x154/0x248 + +Sometimes it crashes at exports_net_open() and sometimes cache_seq_next_rcu(). + +and same is happening on latest 6.14 kernel as well: + +[ 0.000000] Linux version 6.14.0-rc5-next-20250304-dirty +... +[ 285.455918] Unable to handle kernel paging request at virtual address 00001f4800001f48 +... +[ 285.464902] pc : cache_seq_next_rcu+0x78/0xa4 +... +[ 285.469695] Call trace: +[ 285.470083] cache_seq_next_rcu+0x78/0xa4 (P) +[ 285.470488] seq_read+0xe0/0x11c +[ 285.470675] proc_reg_read+0x9c/0xf0 +[ 285.470874] vfs_read+0xc4/0x2fc +[ 285.471057] ksys_read+0x6c/0xf4 +[ 285.471231] __arm64_sys_read+0x1c/0x28 +[ 285.471428] invoke_syscall+0x44/0x100 +[ 285.471633] el0_svc_common.constprop.0+0x40/0xe0 +[ 285.471870] do_el0_svc_compat+0x1c/0x34 +[ 285.472073] el0_svc_compat+0x2c/0x80 +[ 285.472265] el0t_32_sync_handler+0x90/0x140 +[ 285.472473] el0t_32_sync+0x19c/0x1a0 +[ 285.472887] Code: f9400885 93407c23 937d7c27 11000421 (f86378a3) +[ 285.473422] ---[ end trace 0000000000000000 ]--- + +It reproduced simply with below script: +while [ 1 ] +do +/exportfs -r +done & + +while [ 1 ] +do +insmod /nfsd.ko +mount -t nfsd none /proc/fs/nfsd +umount /proc/fs/nfsd +rmmod nfsd +done & + +So exporting interfaces to user space shall be done at last and +cleanup at first place. + +With change there is no Kernel OOPs. + +Co-developed-by: Shubham Rana +Signed-off-by: Shubham Rana +Signed-off-by: Maninder Singh +Reviewed-by: Jeff Layton +Cc: stable@vger.kernel.org +Signed-off-by: Chuck Lever +--- + fs/nfsd/nfsctl.c | 17 ++++++++--------- + 1 file changed, 8 insertions(+), 9 deletions(-) + +--- a/fs/nfsd/nfsctl.c ++++ b/fs/nfsd/nfsctl.c +@@ -2291,12 +2291,9 @@ static int __init init_nfsd(void) + if (retval) + goto out_free_pnfs; + nfsd_lockd_init(); /* lockd->nfsd callbacks */ +- retval = create_proc_exports_entry(); +- if (retval) +- goto out_free_lockd; + retval = register_pernet_subsys(&nfsd_net_ops); + if (retval < 0) +- goto out_free_exports; ++ goto out_free_lockd; + retval = register_cld_notifier(); + if (retval) + goto out_free_subsys; +@@ -2308,11 +2305,16 @@ static int __init init_nfsd(void) + goto out_free_nfsd4; + retval = genl_register_family(&nfsd_nl_family); + if (retval) ++ goto out_free_filesystem; ++ retval = create_proc_exports_entry(); ++ if (retval) + goto out_free_all; + nfsd_localio_ops_init(); + + return 0; + out_free_all: ++ genl_unregister_family(&nfsd_nl_family); ++out_free_filesystem: + unregister_filesystem(&nfsd_fs_type); + out_free_nfsd4: + nfsd4_destroy_laundry_wq(); +@@ -2320,9 +2322,6 @@ out_free_cld: + unregister_cld_notifier(); + out_free_subsys: + unregister_pernet_subsys(&nfsd_net_ops); +-out_free_exports: +- remove_proc_entry("fs/nfs/exports", NULL); +- remove_proc_entry("fs/nfs", NULL); + out_free_lockd: + nfsd_lockd_shutdown(); + nfsd_drc_slab_free(); +@@ -2335,14 +2334,14 @@ out_free_slabs: + + static void __exit exit_nfsd(void) + { ++ remove_proc_entry("fs/nfs/exports", NULL); ++ remove_proc_entry("fs/nfs", NULL); + genl_unregister_family(&nfsd_nl_family); + unregister_filesystem(&nfsd_fs_type); + nfsd4_destroy_laundry_wq(); + unregister_cld_notifier(); + unregister_pernet_subsys(&nfsd_net_ops); + nfsd_drc_slab_free(); +- remove_proc_entry("fs/nfs/exports", NULL); +- remove_proc_entry("fs/nfs", NULL); + nfsd_lockd_shutdown(); + nfsd4_free_slabs(); + nfsd4_exit_pnfs(); diff --git a/debian/patches/patchset-pf/nfs/0003-nfsd-fix-access-checking-for-NLM-under-XPRTSEC-polic.patch b/debian/patches/patchset-pf/nfs/0003-nfsd-fix-access-checking-for-NLM-under-XPRTSEC-polic.patch new file mode 100644 index 0000000..789b1f7 --- /dev/null +++ b/debian/patches/patchset-pf/nfs/0003-nfsd-fix-access-checking-for-NLM-under-XPRTSEC-polic.patch @@ -0,0 +1,35 @@ +From b9293b51ea6182618e474edfbeb5cd34f5e875e8 Mon Sep 17 00:00:00 2001 +From: Olga Kornievskaia +Date: Fri, 21 Mar 2025 20:13:04 -0400 +Subject: nfsd: fix access checking for NLM under XPRTSEC policies + +When an export policy with xprtsec policy is set with "tls" +and/or "mtls", but an NFS client is doing a v3 xprtsec=tls +mount, then NLM locking calls fail with an error because +there is currently no support for NLM with TLS. + +Until such support is added, allow NLM calls under TLS-secured +policy. + +Fixes: 4cc9b9f2bf4d ("nfsd: refine and rename NFSD_MAY_LOCK") +Cc: stable@vger.kernel.org +Signed-off-by: Olga Kornievskaia +Reviewed-by: NeilBrown +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +--- + fs/nfsd/export.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/nfsd/export.c ++++ b/fs/nfsd/export.c +@@ -1124,7 +1124,8 @@ __be32 check_nfsd_access(struct svc_expo + test_bit(XPT_PEER_AUTH, &xprt->xpt_flags)) + goto ok; + } +- goto denied; ++ if (!may_bypass_gss) ++ goto denied; + + ok: + /* legacy gss-only clients are always OK: */ diff --git a/debian/patches/patchset-pf/nfs/0004-nfsd-nfsd4_spo_must_allow-must-check-this-is-a-v4-co.patch b/debian/patches/patchset-pf/nfs/0004-nfsd-nfsd4_spo_must_allow-must-check-this-is-a-v4-co.patch new file mode 100644 index 0000000..120ebf2 --- /dev/null +++ b/debian/patches/patchset-pf/nfs/0004-nfsd-nfsd4_spo_must_allow-must-check-this-is-a-v4-co.patch @@ -0,0 +1,32 @@ +From 778e820deed49a0dee6115c0aa903e626ab635f6 Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Fri, 28 Mar 2025 11:05:59 +1100 +Subject: nfsd: nfsd4_spo_must_allow() must check this is a v4 compound request + +If the request being processed is not a v4 compound request, then +examining the cstate can have undefined results. + +This patch adds a check that the rpc procedure being executed +(rq_procinfo) is the NFSPROC4_COMPOUND procedure. + +Reported-by: Olga Kornievskaia +Cc: stable@vger.kernel.org +Reviewed-by: Jeff Layton +Signed-off-by: NeilBrown +Signed-off-by: Chuck Lever +--- + fs/nfsd/nfs4proc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -3766,7 +3766,8 @@ bool nfsd4_spo_must_allow(struct svc_rqs + struct nfs4_op_map *allow = &cstate->clp->cl_spo_must_allow; + u32 opiter; + +- if (!cstate->minorversion) ++ if (rqstp->rq_procinfo != &nfsd_version4.vs_proc[NFSPROC4_COMPOUND] || ++ cstate->minorversion == 0) + return false; + + if (cstate->spo_must_allowed) diff --git a/debian/patches/patchset-pf/nfs/0005-nfsd-Initialize-ssc-before-laundromat_work-to-preven.patch b/debian/patches/patchset-pf/nfs/0005-nfsd-Initialize-ssc-before-laundromat_work-to-preven.patch new file mode 100644 index 0000000..0b9447f --- /dev/null +++ b/debian/patches/patchset-pf/nfs/0005-nfsd-Initialize-ssc-before-laundromat_work-to-preven.patch @@ -0,0 +1,47 @@ +From 8a7faf80fbb9ecdea403cb4f882354e8a5201acb Mon Sep 17 00:00:00 2001 +From: Li Lingfeng +Date: Mon, 14 Apr 2025 22:38:52 +0800 +Subject: nfsd: Initialize ssc before laundromat_work to prevent NULL + dereference + +In nfs4_state_start_net(), laundromat_work may access nfsd_ssc through +nfs4_laundromat -> nfsd4_ssc_expire_umount. If nfsd_ssc isn't initialized, +this can cause NULL pointer dereference. + +Normally the delayed start of laundromat_work allows sufficient time for +nfsd_ssc initialization to complete. However, when the kernel waits too +long for userspace responses (e.g. in nfs4_state_start_net -> +nfsd4_end_grace -> nfsd4_record_grace_done -> nfsd4_cld_grace_done -> +cld_pipe_upcall -> __cld_pipe_upcall -> wait_for_completion path), the +delayed work may start before nfsd_ssc initialization finishes. + +Fix this by moving nfsd_ssc initialization before starting laundromat_work. + +Fixes: f4e44b393389 ("NFSD: delay unmount source's export after inter-server copy completed.") +Cc: stable@vger.kernel.org +Reviewed-by: Jeff Layton +Signed-off-by: Li Lingfeng +Signed-off-by: Chuck Lever +--- + fs/nfsd/nfssvc.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/fs/nfsd/nfssvc.c ++++ b/fs/nfsd/nfssvc.c +@@ -396,13 +396,13 @@ static int nfsd_startup_net(struct net * + if (ret) + goto out_filecache; + ++#ifdef CONFIG_NFSD_V4_2_INTER_SSC ++ nfsd4_ssc_init_umount_work(nn); ++#endif + ret = nfs4_state_start_net(net); + if (ret) + goto out_reply_cache; + +-#ifdef CONFIG_NFSD_V4_2_INTER_SSC +- nfsd4_ssc_init_umount_work(nn); +-#endif + nn->nfsd_net_up = true; + return 0; + diff --git a/debian/patches/patchset-pf/nfs/0006-NFSD-Implement-FATTR4_CLONE_BLKSIZE-attribute.patch b/debian/patches/patchset-pf/nfs/0006-NFSD-Implement-FATTR4_CLONE_BLKSIZE-attribute.patch new file mode 100644 index 0000000..4a8f0b2 --- /dev/null +++ b/debian/patches/patchset-pf/nfs/0006-NFSD-Implement-FATTR4_CLONE_BLKSIZE-attribute.patch @@ -0,0 +1,62 @@ +From 12e39177848d11c6ac5ad70ce530364fac7f36d3 Mon Sep 17 00:00:00 2001 +From: Chuck Lever +Date: Wed, 7 May 2025 10:45:15 -0400 +Subject: NFSD: Implement FATTR4_CLONE_BLKSIZE attribute + +RFC 7862 states that if an NFS server implements a CLONE operation, +it MUST also implement FATTR4_CLONE_BLKSIZE. NFSD implements CLONE, +but does not implement FATTR4_CLONE_BLKSIZE. + +Note that in Section 12.2, RFC 7862 claims that +FATTR4_CLONE_BLKSIZE is RECOMMENDED, not REQUIRED. Likely this is +because a minor version is not permitted to add a REQUIRED +attribute. Confusing. + +We assume this attribute reports a block size as a count of bytes, +as RFC 7862 does not specify a unit. + +Reported-by: Roland Mainz +Suggested-by: Christoph Hellwig +Reviewed-by: Roland Mainz +Cc: stable@vger.kernel.org # v6.7+ +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +--- + fs/nfsd/nfs4xdr.c | 19 ++++++++++++++++++- + 1 file changed, 18 insertions(+), 1 deletion(-) + +--- a/fs/nfsd/nfs4xdr.c ++++ b/fs/nfsd/nfs4xdr.c +@@ -3391,6 +3391,23 @@ static __be32 nfsd4_encode_fattr4_suppat + return nfsd4_encode_bitmap4(xdr, supp[0], supp[1], supp[2]); + } + ++/* ++ * Copied from generic_remap_checks/generic_remap_file_range_prep. ++ * ++ * These generic functions use the file system's s_blocksize, but ++ * individual file systems aren't required to use ++ * generic_remap_file_range_prep. Until there is a mechanism for ++ * determining a particular file system's (or file's) clone block ++ * size, this is the best NFSD can do. ++ */ ++static __be32 nfsd4_encode_fattr4_clone_blksize(struct xdr_stream *xdr, ++ const struct nfsd4_fattr_args *args) ++{ ++ struct inode *inode = d_inode(args->dentry); ++ ++ return nfsd4_encode_uint32_t(xdr, inode->i_sb->s_blocksize); ++} ++ + #ifdef CONFIG_NFSD_V4_SECURITY_LABEL + static __be32 nfsd4_encode_fattr4_sec_label(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +@@ -3545,7 +3562,7 @@ static const nfsd4_enc_attr nfsd4_enc_fa + [FATTR4_MODE_SET_MASKED] = nfsd4_encode_fattr4__noop, + [FATTR4_SUPPATTR_EXCLCREAT] = nfsd4_encode_fattr4_suppattr_exclcreat, + [FATTR4_FS_CHARSET_CAP] = nfsd4_encode_fattr4__noop, +- [FATTR4_CLONE_BLKSIZE] = nfsd4_encode_fattr4__noop, ++ [FATTR4_CLONE_BLKSIZE] = nfsd4_encode_fattr4_clone_blksize, + [FATTR4_SPACE_FREED] = nfsd4_encode_fattr4__noop, + [FATTR4_CHANGE_ATTR_TYPE] = nfsd4_encode_fattr4__noop, + diff --git a/debian/patches/patchset-pf/nfs/0007-fs-nfs-read-fix-double-unlock-bug-in-nfs_return_empt.patch b/debian/patches/patchset-pf/nfs/0007-fs-nfs-read-fix-double-unlock-bug-in-nfs_return_empt.patch new file mode 100644 index 0000000..f7f17d2 --- /dev/null +++ b/debian/patches/patchset-pf/nfs/0007-fs-nfs-read-fix-double-unlock-bug-in-nfs_return_empt.patch @@ -0,0 +1,65 @@ +From 2623f0468759aba585c7ae86adc1cf1cb11e1b63 Mon Sep 17 00:00:00 2001 +From: Max Kellermann +Date: Wed, 23 Apr 2025 15:22:50 +0200 +Subject: fs/nfs/read: fix double-unlock bug in nfs_return_empty_folio() + +Sometimes, when a file was read while it was being truncated by +another NFS client, the kernel could deadlock because folio_unlock() +was called twice, and the second call would XOR back the `PG_locked` +flag. + +Most of the time (depending on the timing of the truncation), nobody +notices the problem because folio_unlock() gets called three times, +which flips `PG_locked` back off: + + 1. vfs_read, nfs_read_folio, ... nfs_read_add_folio, + nfs_return_empty_folio + 2. vfs_read, nfs_read_folio, ... netfs_read_collection, + netfs_unlock_abandoned_read_pages + 3. vfs_read, ... nfs_do_read_folio, nfs_read_add_folio, + nfs_return_empty_folio + +The problem is that nfs_read_add_folio() is not supposed to unlock the +folio if fscache is enabled, and a nfs_netfs_folio_unlock() check is +missing in nfs_return_empty_folio(). + +Rarely this leads to a warning in netfs_read_collection(): + + ------------[ cut here ]------------ + R=0000031c: folio 10 is not locked + WARNING: CPU: 0 PID: 29 at fs/netfs/read_collect.c:133 netfs_read_collection+0x7c0/0xf00 + [...] + Workqueue: events_unbound netfs_read_collection_worker + RIP: 0010:netfs_read_collection+0x7c0/0xf00 + [...] + Call Trace: + + netfs_read_collection_worker+0x67/0x80 + process_one_work+0x12e/0x2c0 + worker_thread+0x295/0x3a0 + +Most of the time, however, processes just get stuck forever in +folio_wait_bit_common(), waiting for `PG_locked` to disappear, which +never happens because nobody is really holding the folio lock. + +Fixes: 000dbe0bec05 ("NFS: Convert buffered read paths to use netfs when fscache is enabled") +Cc: stable@vger.kernel.org +Signed-off-by: Max Kellermann +Reviewed-by: Dave Wysochanski +Signed-off-by: Anna Schumaker +--- + fs/nfs/read.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/nfs/read.c ++++ b/fs/nfs/read.c +@@ -56,7 +56,8 @@ static int nfs_return_empty_folio(struct + { + folio_zero_segment(folio, 0, folio_size(folio)); + folio_mark_uptodate(folio); +- folio_unlock(folio); ++ if (nfs_netfs_folio_unlock(folio)) ++ folio_unlock(folio); + return 0; + } + diff --git a/debian/patches/patchset-pf/nfs/0008-NFSv4-Don-t-check-for-OPEN-feature-support-in-v4.1.patch b/debian/patches/patchset-pf/nfs/0008-NFSv4-Don-t-check-for-OPEN-feature-support-in-v4.1.patch new file mode 100644 index 0000000..ec9ddff --- /dev/null +++ b/debian/patches/patchset-pf/nfs/0008-NFSv4-Don-t-check-for-OPEN-feature-support-in-v4.1.patch @@ -0,0 +1,32 @@ +From d87e5957afccde6cc0719ab0a554757dcafa85ce Mon Sep 17 00:00:00 2001 +From: Scott Mayhew +Date: Wed, 30 Apr 2025 07:12:29 -0400 +Subject: NFSv4: Don't check for OPEN feature support in v4.1 + +fattr4_open_arguments is a v4.2 recommended attribute, so we shouldn't +be sending it to v4.1 servers. + +Fixes: cb78f9b7d0c0 ("nfs: fix the fetch of FATTR4_OPEN_ARGUMENTS") +Signed-off-by: Scott Mayhew +Reviewed-by: Jeff Layton +Reviewed-by: Benjamin Coddington +Cc: stable@vger.kernel.org # 6.11+ +Signed-off-by: Anna Schumaker +--- + fs/nfs/nfs4proc.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -3976,8 +3976,9 @@ static int _nfs4_server_capabilities(str + FATTR4_WORD0_CASE_INSENSITIVE | + FATTR4_WORD0_CASE_PRESERVING; + if (minorversion) +- bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT | +- FATTR4_WORD2_OPEN_ARGUMENTS; ++ bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT; ++ if (minorversion > 1) ++ bitmask[2] |= FATTR4_WORD2_OPEN_ARGUMENTS; + + status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); + if (status == 0) { diff --git a/debian/patches/patchset-pf/nfs/0009-NFS-always-probe-for-LOCALIO-support-asynchronously.patch b/debian/patches/patchset-pf/nfs/0009-NFS-always-probe-for-LOCALIO-support-asynchronously.patch new file mode 100644 index 0000000..aa21220 --- /dev/null +++ b/debian/patches/patchset-pf/nfs/0009-NFS-always-probe-for-LOCALIO-support-asynchronously.patch @@ -0,0 +1,96 @@ +From 9e7464ef730cfe5bbab845ff12b295575d874216 Mon Sep 17 00:00:00 2001 +From: Mike Snitzer +Date: Tue, 13 May 2025 12:08:31 -0400 +Subject: NFS: always probe for LOCALIO support asynchronously + +It was reported that NFS client mounts of AWS Elastic File System +(EFS) volumes is slow, this is because the AWS firewall disallows +LOCALIO (because it doesn't consider the use of NFS_LOCALIO_PROGRAM +valid), see: https://bugzilla.redhat.com/show_bug.cgi?id=2335129 + +Switch to performing the LOCALIO probe asynchronously to address the +potential for the NFS LOCALIO protocol being disallowed and/or slowed +by the remote server's response. + +While at it, fix nfs_local_probe_async() to always take/put a +reference on the nfs_client that is using the LOCALIO protocol. +Also, unexport the nfs_local_probe() symbol and make it private to +fs/nfs/localio.c + +This change has the side-effect of initially issuing reads, writes and +commits over the wire via SUNRPC until the LOCALIO probe completes. + +Suggested-by: Jeff Layton # to always probe async +Fixes: 76d4cb6345da ("nfs: probe for LOCALIO when v4 client reconnects to server") +Cc: stable@vger.kernel.org # 6.14+ +Signed-off-by: Mike Snitzer +Reviewed-by: Jeff Layton +Signed-off-by: Anna Schumaker +--- + fs/nfs/client.c | 2 +- + fs/nfs/flexfilelayout/flexfilelayoutdev.c | 2 +- + fs/nfs/internal.h | 1 - + fs/nfs/localio.c | 6 ++++-- + 4 files changed, 6 insertions(+), 5 deletions(-) + +--- a/fs/nfs/client.c ++++ b/fs/nfs/client.c +@@ -439,7 +439,7 @@ struct nfs_client *nfs_get_client(const + spin_unlock(&nn->nfs_client_lock); + new = rpc_ops->init_client(new, cl_init); + if (!IS_ERR(new)) +- nfs_local_probe(new); ++ nfs_local_probe_async(new); + return new; + } + +--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c ++++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c +@@ -400,7 +400,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_la + * keep ds_clp even if DS is local, so that if local IO cannot + * proceed somehow, we can fall back to NFS whenever we want. + */ +- nfs_local_probe(ds->ds_clp); ++ nfs_local_probe_async(ds->ds_clp); + max_payload = + nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), + NULL); +--- a/fs/nfs/internal.h ++++ b/fs/nfs/internal.h +@@ -455,7 +455,6 @@ extern int nfs_wait_bit_killable(struct + + #if IS_ENABLED(CONFIG_NFS_LOCALIO) + /* localio.c */ +-extern void nfs_local_probe(struct nfs_client *); + extern void nfs_local_probe_async(struct nfs_client *); + extern void nfs_local_probe_async_work(struct work_struct *); + extern struct nfsd_file *nfs_local_open_fh(struct nfs_client *, +--- a/fs/nfs/localio.c ++++ b/fs/nfs/localio.c +@@ -171,7 +171,7 @@ static bool nfs_server_uuid_is_local(str + * - called after alloc_client and init_client (so cl_rpcclient exists) + * - this function is idempotent, it can be called for old or new clients + */ +-void nfs_local_probe(struct nfs_client *clp) ++static void nfs_local_probe(struct nfs_client *clp) + { + /* Disallow localio if disabled via sysfs or AUTH_SYS isn't used */ + if (!localio_enabled || +@@ -191,14 +191,16 @@ void nfs_local_probe(struct nfs_client * + nfs_localio_enable_client(clp); + nfs_uuid_end(&clp->cl_uuid); + } +-EXPORT_SYMBOL_GPL(nfs_local_probe); + + void nfs_local_probe_async_work(struct work_struct *work) + { + struct nfs_client *clp = + container_of(work, struct nfs_client, cl_local_probe_work); + ++ if (!refcount_inc_not_zero(&clp->cl_count)) ++ return; + nfs_local_probe(clp); ++ nfs_put_client(clp); + } + + void nfs_local_probe_async(struct nfs_client *clp) diff --git a/debian/patches/patchset-pf/smb/0001-smb-client-add-NULL-check-in-automount_fullpath.patch b/debian/patches/patchset-pf/smb/0001-smb-client-add-NULL-check-in-automount_fullpath.patch new file mode 100644 index 0000000..b6f5185 --- /dev/null +++ b/debian/patches/patchset-pf/smb/0001-smb-client-add-NULL-check-in-automount_fullpath.patch @@ -0,0 +1,29 @@ +From 97831e31e43bb023d208b2344546a4e51e580dc6 Mon Sep 17 00:00:00 2001 +From: Ruben Devos +Date: Sun, 1 Jun 2025 19:18:55 +0200 +Subject: smb: client: add NULL check in automount_fullpath + +page is checked for null in __build_path_from_dentry_optional_prefix +when tcon->origin_fullpath is not set. However, the check is missing when +it is set. +Add a check to prevent a potential NULL pointer dereference. + +Signed-off-by: Ruben Devos +Cc: stable@vger.kernel.org +Signed-off-by: Steve French +--- + fs/smb/client/namespace.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/smb/client/namespace.c ++++ b/fs/smb/client/namespace.c +@@ -146,6 +146,9 @@ static char *automount_fullpath(struct d + } + spin_unlock(&tcon->tc_lock); + ++ if (unlikely(!page)) ++ return ERR_PTR(-ENOMEM); ++ + s = dentry_path_raw(dentry, page, PATH_MAX); + if (IS_ERR(s)) + return s; diff --git a/debian/patches/patchset-pf/smb/0002-cifs-reset-connections-for-all-channels-when-reconne.patch b/debian/patches/patchset-pf/smb/0002-cifs-reset-connections-for-all-channels-when-reconne.patch new file mode 100644 index 0000000..f75147c --- /dev/null +++ b/debian/patches/patchset-pf/smb/0002-cifs-reset-connections-for-all-channels-when-reconne.patch @@ -0,0 +1,39 @@ +From 0ca6d39b6d40b868eb6b4021f918de7a0f6a0f2e Mon Sep 17 00:00:00 2001 +From: Shyam Prasad N +Date: Mon, 2 Jun 2025 22:37:13 +0530 +Subject: cifs: reset connections for all channels when reconnect requested + +cifs_reconnect can be called with a flag to mark the session as needing +reconnect too. When this is done, we expect the connections of all +channels to be reconnected too, which is not happening today. + +Without doing this, we have seen bad things happen when primary and +secondary channels are connected to different servers (in case of cloud +services like Azure Files SMB). + +This change would force all connections to reconnect as well, not just +the sessions and tcons. + +Cc: +Signed-off-by: Shyam Prasad N +Signed-off-by: Steve French +--- + fs/smb/client/connect.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/fs/smb/client/connect.c ++++ b/fs/smb/client/connect.c +@@ -377,6 +377,13 @@ static int __cifs_reconnect(struct TCP_S + if (!cifs_tcp_ses_needs_reconnect(server, 1)) + return 0; + ++ /* ++ * if smb session has been marked for reconnect, also reconnect all ++ * connections. This way, the other connections do not end up bad. ++ */ ++ if (mark_smb_session) ++ cifs_signal_cifsd_for_reconnect(server, mark_smb_session); ++ + cifs_mark_tcp_ses_conns_for_reconnect(server, mark_smb_session); + + cifs_abort_connection(server); diff --git a/debian/patches/patchset-pf/smb/0003-cifs-update-dstaddr-whenever-channel-iface-is-update.patch b/debian/patches/patchset-pf/smb/0003-cifs-update-dstaddr-whenever-channel-iface-is-update.patch new file mode 100644 index 0000000..23b12a7 --- /dev/null +++ b/debian/patches/patchset-pf/smb/0003-cifs-update-dstaddr-whenever-channel-iface-is-update.patch @@ -0,0 +1,31 @@ +From d1f84c6baebc480106c9558dea4842ecb3059017 Mon Sep 17 00:00:00 2001 +From: Shyam Prasad N +Date: Mon, 2 Jun 2025 22:37:14 +0530 +Subject: cifs: update dstaddr whenever channel iface is updated + +When the server interface info changes (more common in clustered +servers like Azure Files), the per-channel iface gets updated. +However, this did not update the corresponding dstaddr. As a result +these channels will still connect (or try connecting) to older addresses. + +Fixes: b54034a73baf ("cifs: during reconnect, update interface if necessary") +Cc: +Signed-off-by: Shyam Prasad N +Signed-off-by: Steve French +--- + fs/smb/client/sess.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/fs/smb/client/sess.c ++++ b/fs/smb/client/sess.c +@@ -445,6 +445,10 @@ cifs_chan_update_iface(struct cifs_ses * + + ses->chans[chan_index].iface = iface; + spin_unlock(&ses->chan_lock); ++ ++ spin_lock(&server->srv_lock); ++ memcpy(&server->dstaddr, &iface->sockaddr, sizeof(server->dstaddr)); ++ spin_unlock(&server->srv_lock); + } + + static int diff --git a/debian/patches/patchset-pf/smb/0004-cifs-dns-resolution-is-needed-only-for-primary-chann.patch b/debian/patches/patchset-pf/smb/0004-cifs-dns-resolution-is-needed-only-for-primary-chann.patch new file mode 100644 index 0000000..99c65dc --- /dev/null +++ b/debian/patches/patchset-pf/smb/0004-cifs-dns-resolution-is-needed-only-for-primary-chann.patch @@ -0,0 +1,33 @@ +From 2bffd71a70fa4695f62712688a720393cc92032b Mon Sep 17 00:00:00 2001 +From: Shyam Prasad N +Date: Mon, 2 Jun 2025 22:37:16 +0530 +Subject: cifs: dns resolution is needed only for primary channel + +When calling cifs_reconnect, before the connection to the +server is reestablished, the code today does a DNS resolution and +updates server->dstaddr. + +However, this is not necessary for secondary channels. Secondary +channels use the interface list returned by the server to decide +which address to connect to. And that happens after tcon is reconnected +and server interfaces are requested. + +Signed-off-by: Shyam Prasad N +Cc: stable@vger.kernel.org +Signed-off-by: Steve French +--- + fs/smb/client/connect.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/smb/client/connect.c ++++ b/fs/smb/client/connect.c +@@ -392,7 +392,8 @@ static int __cifs_reconnect(struct TCP_S + try_to_freeze(); + cifs_server_lock(server); + +- if (!cifs_swn_set_server_dstaddr(server)) { ++ if (!cifs_swn_set_server_dstaddr(server) && ++ !SERVER_IS_CHAN(server)) { + /* resolve the hostname again to make sure that IP address is up-to-date */ + rc = reconn_set_ipaddr_from_hostname(server); + cifs_dbg(FYI, "%s: reconn_set_ipaddr_from_hostname: rc=%d\n", __func__, rc); diff --git a/debian/patches/patchset-pf/smb/0005-cifs-deal-with-the-channel-loading-lag-while-picking.patch b/debian/patches/patchset-pf/smb/0005-cifs-deal-with-the-channel-loading-lag-while-picking.patch new file mode 100644 index 0000000..a824997 --- /dev/null +++ b/debian/patches/patchset-pf/smb/0005-cifs-deal-with-the-channel-loading-lag-while-picking.patch @@ -0,0 +1,73 @@ +From 918f494c058028cee8bdff33a4aa613377da61f0 Mon Sep 17 00:00:00 2001 +From: Shyam Prasad N +Date: Mon, 2 Jun 2025 22:37:12 +0530 +Subject: cifs: deal with the channel loading lag while picking channels + +Our current approach to select a channel for sending requests is this: +1. iterate all channels to find the min and max queue depth +2. if min and max are not the same, pick the channel with min depth +3. if min and max are same, round robin, as all channels are equally loaded + +The problem with this approach is that there's a lag between selecting +a channel and sending the request (that increases the queue depth on the channel). +While these numbers will eventually catch up, there could be a skew in the +channel usage, depending on the application's I/O parallelism and the server's +speed of handling requests. + +With sufficient parallelism, this lag can artificially increase the queue depth, +thereby impacting the performance negatively. + +This change will change the step 1 above to start the iteration from the last +selected channel. This is to reduce the skew in channel usage even in the presence +of this lag. + +Fixes: ea90708d3cf3 ("cifs: use the least loaded channel for sending requests") +Cc: +Signed-off-by: Shyam Prasad N +Signed-off-by: Steve French +--- + fs/smb/client/transport.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/fs/smb/client/transport.c ++++ b/fs/smb/client/transport.c +@@ -1018,14 +1018,16 @@ struct TCP_Server_Info *cifs_pick_channe + uint index = 0; + unsigned int min_in_flight = UINT_MAX, max_in_flight = 0; + struct TCP_Server_Info *server = NULL; +- int i; ++ int i, start, cur; + + if (!ses) + return NULL; + + spin_lock(&ses->chan_lock); ++ start = atomic_inc_return(&ses->chan_seq); + for (i = 0; i < ses->chan_count; i++) { +- server = ses->chans[i].server; ++ cur = (start + i) % ses->chan_count; ++ server = ses->chans[cur].server; + if (!server || server->terminate) + continue; + +@@ -1042,17 +1044,15 @@ struct TCP_Server_Info *cifs_pick_channe + */ + if (server->in_flight < min_in_flight) { + min_in_flight = server->in_flight; +- index = i; ++ index = cur; + } + if (server->in_flight > max_in_flight) + max_in_flight = server->in_flight; + } + + /* if all channels are equally loaded, fall back to round-robin */ +- if (min_in_flight == max_in_flight) { +- index = (uint)atomic_inc_return(&ses->chan_seq); +- index %= ses->chan_count; +- } ++ if (min_in_flight == max_in_flight) ++ index = (uint)start % ses->chan_count; + + server = ses->chans[index].server; + spin_unlock(&ses->chan_lock); diff --git a/debian/patches/patchset-pf/smb/0006-cifs-serialize-other-channels-when-query-server-inte.patch b/debian/patches/patchset-pf/smb/0006-cifs-serialize-other-channels-when-query-server-inte.patch new file mode 100644 index 0000000..2afe01e --- /dev/null +++ b/debian/patches/patchset-pf/smb/0006-cifs-serialize-other-channels-when-query-server-inte.patch @@ -0,0 +1,82 @@ +From 2cc6528030c91406031698e047896faa99fc0092 Mon Sep 17 00:00:00 2001 +From: Shyam Prasad N +Date: Mon, 2 Jun 2025 22:37:15 +0530 +Subject: cifs: serialize other channels when query server interfaces is + pending + +Today, during smb2_reconnect, session_mutex is released as soon as +the tcon is reconnected and is in a good state. However, in case +multichannel is enabled, there is also a query of server interfaces that +follows. We've seen that this query can race with reconnects of other +channels, causing them to step on each other with reconnects. + +This change extends the hold of session_mutex till after the query of +server interfaces is complete. In order to avoid recursive smb2_reconnect +checks during query ioctl, this change also introduces a session flag +for sessions where such a query is in progress. + +Signed-off-by: Shyam Prasad N +Cc: stable@vger.kernel.org +Signed-off-by: Steve French +--- + fs/smb/client/cifsglob.h | 1 + + fs/smb/client/smb2pdu.c | 24 ++++++++++++++++++------ + 2 files changed, 19 insertions(+), 6 deletions(-) + +--- a/fs/smb/client/cifsglob.h ++++ b/fs/smb/client/cifsglob.h +@@ -1084,6 +1084,7 @@ struct cifs_chan { + }; + + #define CIFS_SES_FLAG_SCALE_CHANNELS (0x1) ++#define CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES (0x2) + + /* + * Session structure. One of these for each uid session with a particular host +--- a/fs/smb/client/smb2pdu.c ++++ b/fs/smb/client/smb2pdu.c +@@ -411,14 +411,19 @@ skip_sess_setup: + if (!rc && + (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL) && + server->ops->query_server_interfaces) { +- mutex_unlock(&ses->session_mutex); +- + /* +- * query server network interfaces, in case they change ++ * query server network interfaces, in case they change. ++ * Also mark the session as pending this update while the query ++ * is in progress. This will be used to avoid calling ++ * smb2_reconnect recursively. + */ ++ ses->flags |= CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES; + xid = get_xid(); + rc = server->ops->query_server_interfaces(xid, tcon, false); + free_xid(xid); ++ ses->flags &= ~CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES; ++ ++ mutex_unlock(&ses->session_mutex); + + if (rc == -EOPNOTSUPP && ses->chan_count > 1) { + /* +@@ -560,11 +565,18 @@ static int smb2_ioctl_req_init(u32 opcod + struct TCP_Server_Info *server, + void **request_buf, unsigned int *total_len) + { +- /* Skip reconnect only for FSCTL_VALIDATE_NEGOTIATE_INFO IOCTLs */ +- if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO) { ++ /* ++ * Skip reconnect in one of the following cases: ++ * 1. For FSCTL_VALIDATE_NEGOTIATE_INFO IOCTLs ++ * 2. For FSCTL_QUERY_NETWORK_INTERFACE_INFO IOCTL when called from ++ * smb2_reconnect (indicated by CIFS_SES_FLAG_SCALE_CHANNELS ses flag) ++ */ ++ if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO || ++ (opcode == FSCTL_QUERY_NETWORK_INTERFACE_INFO && ++ (tcon->ses->flags & CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES))) + return __smb2_plain_req_init(SMB2_IOCTL, tcon, server, + request_buf, total_len); +- } ++ + return smb2_plain_req_init(SMB2_IOCTL, tcon, server, + request_buf, total_len); + } diff --git a/debian/patches/patchset-pf/smb/0007-cifs-do-not-disable-interface-polling-on-failure.patch b/debian/patches/patchset-pf/smb/0007-cifs-do-not-disable-interface-polling-on-failure.patch new file mode 100644 index 0000000..2e6535f --- /dev/null +++ b/debian/patches/patchset-pf/smb/0007-cifs-do-not-disable-interface-polling-on-failure.patch @@ -0,0 +1,64 @@ +From 48fd713e7c35aba7a4c3ed327977897909575e3e Mon Sep 17 00:00:00 2001 +From: Shyam Prasad N +Date: Mon, 2 Jun 2025 22:37:17 +0530 +Subject: cifs: do not disable interface polling on failure + +When a server has multichannel enabled, we keep polling the server +for interfaces periodically. However, when this query fails, we +disable the polling. This can be problematic as it takes away the +chance for the server to start advertizing again. + +This change reschedules the delayed work, even if the current call +failed. That way, multichannel sessions can recover. + +Signed-off-by: Shyam Prasad N +Cc: stable@vger.kernel.org +Signed-off-by: Steve French +--- + fs/smb/client/connect.c | 6 +----- + fs/smb/client/smb2pdu.c | 9 +++++---- + 2 files changed, 6 insertions(+), 9 deletions(-) + +--- a/fs/smb/client/connect.c ++++ b/fs/smb/client/connect.c +@@ -116,13 +116,9 @@ static void smb2_query_server_interfaces + rc = server->ops->query_server_interfaces(xid, tcon, false); + free_xid(xid); + +- if (rc) { +- if (rc == -EOPNOTSUPP) +- return; +- ++ if (rc) + cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n", + __func__, rc); +- } + + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); +--- a/fs/smb/client/smb2pdu.c ++++ b/fs/smb/client/smb2pdu.c +@@ -423,6 +423,10 @@ skip_sess_setup: + free_xid(xid); + ses->flags &= ~CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES; + ++ /* regardless of rc value, setup polling */ ++ queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, ++ (SMB_INTERFACE_POLL_INTERVAL * HZ)); ++ + mutex_unlock(&ses->session_mutex); + + if (rc == -EOPNOTSUPP && ses->chan_count > 1) { +@@ -443,11 +447,8 @@ skip_sess_setup: + if (ses->chan_max > ses->chan_count && + ses->iface_count && + !SERVER_IS_CHAN(server)) { +- if (ses->chan_count == 1) { ++ if (ses->chan_count == 1) + cifs_server_dbg(VFS, "supports multichannel now\n"); +- queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, +- (SMB_INTERFACE_POLL_INTERVAL * HZ)); +- } + + cifs_try_adding_channels(ses); + } diff --git a/debian/patches/patchset-pf/smb/0008-smb-improve-directory-cache-reuse-for-readdir-operat.patch b/debian/patches/patchset-pf/smb/0008-smb-improve-directory-cache-reuse-for-readdir-operat.patch new file mode 100644 index 0000000..355bd11 --- /dev/null +++ b/debian/patches/patchset-pf/smb/0008-smb-improve-directory-cache-reuse-for-readdir-operat.patch @@ -0,0 +1,148 @@ +From 17457c5d0fa0b98cef9d2236a1518b1ded25fa5d Mon Sep 17 00:00:00 2001 +From: Bharath SM +Date: Wed, 11 Jun 2025 16:59:02 +0530 +Subject: smb: improve directory cache reuse for readdir operations + +Currently, cached directory contents were not reused across subsequent +'ls' operations because the cache validity check relied on comparing +the ctx pointer, which changes with each readdir invocation. As a +result, the cached dir entries was not marked as valid and the cache was +not utilized for subsequent 'ls' operations. + +This change uses the file pointer, which remains consistent across all +readdir calls for a given directory instance, to associate and validate +the cache. As a result, cached directory contents can now be +correctly reused, improving performance for repeated directory listings. + +Performance gains with local windows SMB server: + +Without the patch and default actimeo=1: + 1000 directory enumeration operations on dir with 10k files took 135.0s + +With this patch and actimeo=0: + 1000 directory enumeration operations on dir with 10k files took just 5.1s + +Signed-off-by: Bharath SM +Reviewed-by: Shyam Prasad N +Cc: stable@vger.kernel.org +Signed-off-by: Steve French +--- + fs/smb/client/cached_dir.h | 8 ++++---- + fs/smb/client/readdir.c | 28 +++++++++++++++------------- + 2 files changed, 19 insertions(+), 17 deletions(-) + +--- a/fs/smb/client/cached_dir.h ++++ b/fs/smb/client/cached_dir.h +@@ -21,10 +21,10 @@ struct cached_dirent { + struct cached_dirents { + bool is_valid:1; + bool is_failed:1; +- struct dir_context *ctx; /* +- * Only used to make sure we only take entries +- * from a single context. Never dereferenced. +- */ ++ struct file *file; /* ++ * Used to associate the cache with a single ++ * open file instance. ++ */ + struct mutex de_mutex; + int pos; /* Expected ctx->pos */ + struct list_head entries; +--- a/fs/smb/client/readdir.c ++++ b/fs/smb/client/readdir.c +@@ -850,9 +850,9 @@ static bool emit_cached_dirents(struct c + } + + static void update_cached_dirents_count(struct cached_dirents *cde, +- struct dir_context *ctx) ++ struct file *file) + { +- if (cde->ctx != ctx) ++ if (cde->file != file) + return; + if (cde->is_valid || cde->is_failed) + return; +@@ -861,9 +861,9 @@ static void update_cached_dirents_count( + } + + static void finished_cached_dirents_count(struct cached_dirents *cde, +- struct dir_context *ctx) ++ struct dir_context *ctx, struct file *file) + { +- if (cde->ctx != ctx) ++ if (cde->file != file) + return; + if (cde->is_valid || cde->is_failed) + return; +@@ -876,11 +876,12 @@ static void finished_cached_dirents_coun + static void add_cached_dirent(struct cached_dirents *cde, + struct dir_context *ctx, + const char *name, int namelen, +- struct cifs_fattr *fattr) ++ struct cifs_fattr *fattr, ++ struct file *file) + { + struct cached_dirent *de; + +- if (cde->ctx != ctx) ++ if (cde->file != file) + return; + if (cde->is_valid || cde->is_failed) + return; +@@ -910,7 +911,8 @@ static void add_cached_dirent(struct cac + static bool cifs_dir_emit(struct dir_context *ctx, + const char *name, int namelen, + struct cifs_fattr *fattr, +- struct cached_fid *cfid) ++ struct cached_fid *cfid, ++ struct file *file) + { + bool rc; + ino_t ino = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); +@@ -922,7 +924,7 @@ static bool cifs_dir_emit(struct dir_con + if (cfid) { + mutex_lock(&cfid->dirents.de_mutex); + add_cached_dirent(&cfid->dirents, ctx, name, namelen, +- fattr); ++ fattr, file); + mutex_unlock(&cfid->dirents.de_mutex); + } + +@@ -1022,7 +1024,7 @@ static int cifs_filldir(char *find_entry + cifs_prime_dcache(file_dentry(file), &name, &fattr); + + return !cifs_dir_emit(ctx, name.name, name.len, +- &fattr, cfid); ++ &fattr, cfid, file); + } + + +@@ -1073,8 +1075,8 @@ int cifs_readdir(struct file *file, stru + * we need to initialize scanning and storing the + * directory content. + */ +- if (ctx->pos == 0 && cfid->dirents.ctx == NULL) { +- cfid->dirents.ctx = ctx; ++ if (ctx->pos == 0 && cfid->dirents.file == NULL) { ++ cfid->dirents.file = file; + cfid->dirents.pos = 2; + } + /* +@@ -1142,7 +1144,7 @@ int cifs_readdir(struct file *file, stru + } else { + if (cfid) { + mutex_lock(&cfid->dirents.de_mutex); +- finished_cached_dirents_count(&cfid->dirents, ctx); ++ finished_cached_dirents_count(&cfid->dirents, ctx, file); + mutex_unlock(&cfid->dirents.de_mutex); + } + cifs_dbg(FYI, "Could not find entry\n"); +@@ -1183,7 +1185,7 @@ int cifs_readdir(struct file *file, stru + ctx->pos++; + if (cfid) { + mutex_lock(&cfid->dirents.de_mutex); +- update_cached_dirents_count(&cfid->dirents, ctx); ++ update_cached_dirents_count(&cfid->dirents, file); + mutex_unlock(&cfid->dirents.de_mutex); + } + diff --git a/debian/patches/patchset-pf/xfs/0001-xfs-don-t-assume-perags-are-initialised-when-trimmin.patch b/debian/patches/patchset-pf/xfs/0001-xfs-don-t-assume-perags-are-initialised-when-trimmin.patch new file mode 100644 index 0000000..96e8f4f --- /dev/null +++ b/debian/patches/patchset-pf/xfs/0001-xfs-don-t-assume-perags-are-initialised-when-trimmin.patch @@ -0,0 +1,81 @@ +From c63d4a0865e8e7549e1305cc67b88a355a4a9a02 Mon Sep 17 00:00:00 2001 +From: Dave Chinner +Date: Thu, 1 May 2025 09:27:24 +1000 +Subject: xfs: don't assume perags are initialised when trimming AGs + +When running fstrim immediately after mounting a V4 filesystem, +the fstrim fails to trim all the free space in the filesystem. It +only trims the first extent in the by-size free space tree in each +AG and then returns. If a second fstrim is then run, it runs +correctly and the entire free space in the filesystem is iterated +and discarded correctly. + +The problem lies in the setup of the trim cursor - it assumes that +pag->pagf_longest is valid without either reading the AGF first or +checking if xfs_perag_initialised_agf(pag) is true or not. + +As a result, when a filesystem is mounted without reading the AGF +(e.g. a clean mount on a v4 filesystem) and the first operation is a +fstrim call, pag->pagf_longest is zero and so the free extent search +starts at the wrong end of the by-size btree and exits after +discarding the first record in the tree. + +Fix this by deferring the initialisation of tcur->count to after +we have locked the AGF and guaranteed that the perag is properly +initialised. We trigger this on tcur->count == 0 after locking the +AGF, as this will only occur on the first call to +xfs_trim_gather_extents() for each AG. If we need to iterate, +tcur->count will be set to the length of the record we need to +restart at, so we can use this to ensure we only sample a valid +pag->pagf_longest value for the iteration. + +Signed-off-by: Dave Chinner +Reviewed-by: Bill O'Donnell +Reviewed-by: Darrick J. Wong +Fixes: 89cfa899608f ("xfs: reduce AGF hold times during fstrim operations") +Cc: # v6.6 +Signed-off-by: Carlos Maiolino +--- + fs/xfs/xfs_discard.c | 17 ++++++++++++++++- + 1 file changed, 16 insertions(+), 1 deletion(-) + +--- a/fs/xfs/xfs_discard.c ++++ b/fs/xfs/xfs_discard.c +@@ -167,6 +167,14 @@ xfs_discard_extents( + return error; + } + ++/* ++ * Care must be taken setting up the trim cursor as the perags may not have been ++ * initialised when the cursor is initialised. e.g. a clean mount which hasn't ++ * read in AGFs and the first operation run on the mounted fs is a trim. This ++ * can result in perag fields that aren't initialised until ++ * xfs_trim_gather_extents() calls xfs_alloc_read_agf() to lock down the AG for ++ * the free space search. ++ */ + struct xfs_trim_cur { + xfs_agblock_t start; + xfs_extlen_t count; +@@ -204,6 +212,14 @@ xfs_trim_gather_extents( + if (error) + goto out_trans_cancel; + ++ /* ++ * First time through tcur->count will not have been initialised as ++ * pag->pagf_longest is not guaranteed to be valid before we read ++ * the AGF buffer above. ++ */ ++ if (!tcur->count) ++ tcur->count = pag->pagf_longest; ++ + if (tcur->by_bno) { + /* sub-AG discard request always starts at tcur->start */ + cur = xfs_bnobt_init_cursor(mp, tp, agbp, pag); +@@ -350,7 +366,6 @@ xfs_trim_perag_extents( + { + struct xfs_trim_cur tcur = { + .start = start, +- .count = pag->pagf_longest, + .end = end, + .minlen = minlen, + }; diff --git a/debian/patches/patchset-pf/zstd/0001-zstd-import-upstream-v1.5.7.patch b/debian/patches/patchset-pf/zstd/0001-zstd-import-upstream-v1.5.7.patch deleted file mode 100644 index b155007..0000000 --- a/debian/patches/patchset-pf/zstd/0001-zstd-import-upstream-v1.5.7.patch +++ /dev/null @@ -1,23402 +0,0 @@ -From b0d4b9d688216e91afc7e48348686827bd7b2bb1 Mon Sep 17 00:00:00 2001 -From: Oleksandr Natalenko -Date: Thu, 20 Feb 2025 09:03:32 +0100 -Subject: zstd: import upstream v1.5.7 - -Signed-off-by: Oleksandr Natalenko ---- - include/linux/zstd.h | 86 +- - include/linux/zstd_errors.h | 30 +- - include/linux/zstd_lib.h | 1123 ++++-- - lib/zstd/Makefile | 3 +- - lib/zstd/common/allocations.h | 56 + - lib/zstd/common/bits.h | 150 + - lib/zstd/common/bitstream.h | 155 +- - lib/zstd/common/compiler.h | 151 +- - lib/zstd/common/cpu.h | 3 +- - lib/zstd/common/debug.c | 9 +- - lib/zstd/common/debug.h | 37 +- - lib/zstd/common/entropy_common.c | 42 +- - lib/zstd/common/error_private.c | 13 +- - lib/zstd/common/error_private.h | 88 +- - lib/zstd/common/fse.h | 103 +- - lib/zstd/common/fse_decompress.c | 132 +- - lib/zstd/common/huf.h | 240 +- - lib/zstd/common/mem.h | 3 +- - lib/zstd/common/portability_macros.h | 45 +- - lib/zstd/common/zstd_common.c | 38 +- - lib/zstd/common/zstd_deps.h | 16 +- - lib/zstd/common/zstd_internal.h | 153 +- - lib/zstd/compress/clevels.h | 3 +- - lib/zstd/compress/fse_compress.c | 74 +- - lib/zstd/compress/hist.c | 13 +- - lib/zstd/compress/hist.h | 10 +- - lib/zstd/compress/huf_compress.c | 441 ++- - lib/zstd/compress/zstd_compress.c | 3289 ++++++++++++----- - lib/zstd/compress/zstd_compress_internal.h | 621 +++- - lib/zstd/compress/zstd_compress_literals.c | 157 +- - lib/zstd/compress/zstd_compress_literals.h | 25 +- - lib/zstd/compress/zstd_compress_sequences.c | 21 +- - lib/zstd/compress/zstd_compress_sequences.h | 16 +- - lib/zstd/compress/zstd_compress_superblock.c | 394 +- - lib/zstd/compress/zstd_compress_superblock.h | 3 +- - lib/zstd/compress/zstd_cwksp.h | 222 +- - lib/zstd/compress/zstd_double_fast.c | 245 +- - lib/zstd/compress/zstd_double_fast.h | 27 +- - lib/zstd/compress/zstd_fast.c | 703 +++- - lib/zstd/compress/zstd_fast.h | 16 +- - lib/zstd/compress/zstd_lazy.c | 840 +++-- - lib/zstd/compress/zstd_lazy.h | 195 +- - lib/zstd/compress/zstd_ldm.c | 102 +- - lib/zstd/compress/zstd_ldm.h | 17 +- - lib/zstd/compress/zstd_ldm_geartab.h | 3 +- - lib/zstd/compress/zstd_opt.c | 571 +-- - lib/zstd/compress/zstd_opt.h | 55 +- - lib/zstd/compress/zstd_preSplit.c | 239 ++ - lib/zstd/compress/zstd_preSplit.h | 34 + - lib/zstd/decompress/huf_decompress.c | 887 +++-- - lib/zstd/decompress/zstd_ddict.c | 9 +- - lib/zstd/decompress/zstd_ddict.h | 3 +- - lib/zstd/decompress/zstd_decompress.c | 375 +- - lib/zstd/decompress/zstd_decompress_block.c | 724 ++-- - lib/zstd/decompress/zstd_decompress_block.h | 10 +- - .../decompress/zstd_decompress_internal.h | 19 +- - lib/zstd/decompress_sources.h | 2 +- - lib/zstd/zstd_common_module.c | 5 +- - lib/zstd/zstd_compress_module.c | 75 +- - lib/zstd/zstd_decompress_module.c | 4 +- - 60 files changed, 8746 insertions(+), 4379 deletions(-) - create mode 100644 lib/zstd/common/allocations.h - create mode 100644 lib/zstd/common/bits.h - create mode 100644 lib/zstd/compress/zstd_preSplit.c - create mode 100644 lib/zstd/compress/zstd_preSplit.h - ---- a/include/linux/zstd.h -+++ b/include/linux/zstd.h -@@ -1,6 +1,6 @@ - /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -160,6 +160,19 @@ typedef ZSTD_parameters zstd_parameters; - zstd_parameters zstd_get_params(int level, - unsigned long long estimated_src_size); - -+typedef ZSTD_CCtx zstd_cctx; -+typedef ZSTD_cParameter zstd_cparameter; -+ -+/** -+ * zstd_cctx_set_param() - sets a compression parameter -+ * @cctx: The context. Must have been initialized with zstd_init_cctx(). -+ * @param: The parameter to set. -+ * @value: The value to set the parameter to. -+ * -+ * Return: Zero or an error, which can be checked using zstd_is_error(). -+ */ -+size_t zstd_cctx_set_param(zstd_cctx *cctx, zstd_cparameter param, int value); -+ - - /** - * zstd_get_cparams() - returns zstd_compression_parameters for selected level -@@ -175,8 +188,6 @@ zstd_compression_parameters zstd_get_cpa - - /* ====== Single-pass Compression ====== */ - --typedef ZSTD_CCtx zstd_cctx; -- - /** - * zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx - * @parameters: The compression parameters to be used. -@@ -191,6 +202,20 @@ typedef ZSTD_CCtx zstd_cctx; - size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters); - - /** -+ * zstd_cctx_workspace_bound_with_ext_seq_prod() - max memory needed to -+ * initialize a zstd_cctx when using the block-level external sequence -+ * producer API. -+ * @parameters: The compression parameters to be used. -+ * -+ * If multiple compression parameters might be used, the caller must call -+ * this function for each set of parameters and use the maximum size. -+ * -+ * Return: A lower bound on the size of the workspace that is passed to -+ * zstd_init_cctx(). -+ */ -+size_t zstd_cctx_workspace_bound_with_ext_seq_prod(const zstd_compression_parameters *parameters); -+ -+/** - * zstd_init_cctx() - initialize a zstd compression context - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. -@@ -425,6 +450,16 @@ typedef ZSTD_CStream zstd_cstream; - size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams); - - /** -+ * zstd_cstream_workspace_bound_with_ext_seq_prod() - memory needed to initialize -+ * a zstd_cstream when using the block-level external sequence producer API. -+ * @cparams: The compression parameters to be used for compression. -+ * -+ * Return: A lower bound on the size of the workspace that is passed to -+ * zstd_init_cstream(). -+ */ -+size_t zstd_cstream_workspace_bound_with_ext_seq_prod(const zstd_compression_parameters *cparams); -+ -+/** - * zstd_init_cstream() - initialize a zstd streaming compression context - * @parameters The zstd parameters to use for compression. - * @pledged_src_size: If params.fParams.contentSizeFlag == 1 then the caller -@@ -584,6 +619,18 @@ size_t zstd_decompress_stream(zstd_dstre - size_t zstd_find_frame_compressed_size(const void *src, size_t src_size); - - /** -+ * zstd_register_sequence_producer() - exposes the zstd library function -+ * ZSTD_registerSequenceProducer(). This is used for the block-level external -+ * sequence producer API. See upstream zstd.h for detailed documentation. -+ */ -+typedef ZSTD_sequenceProducer_F zstd_sequence_producer_f; -+void zstd_register_sequence_producer( -+ zstd_cctx *cctx, -+ void* sequence_producer_state, -+ zstd_sequence_producer_f sequence_producer -+); -+ -+/** - * struct zstd_frame_params - zstd frame parameters stored in the frame header - * @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not - * present. -@@ -596,7 +643,7 @@ size_t zstd_find_frame_compressed_size(c - * - * See zstd_lib.h. - */ --typedef ZSTD_frameHeader zstd_frame_header; -+typedef ZSTD_FrameHeader zstd_frame_header; - - /** - * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame -@@ -611,4 +658,35 @@ typedef ZSTD_frameHeader zstd_frame_head - size_t zstd_get_frame_header(zstd_frame_header *params, const void *src, - size_t src_size); - -+/** -+ * struct zstd_sequence - a sequence of literals or a match -+ * -+ * @offset: The offset of the match -+ * @litLength: The literal length of the sequence -+ * @matchLength: The match length of the sequence -+ * @rep: Represents which repeat offset is used -+ */ -+typedef ZSTD_Sequence zstd_sequence; -+ -+/** -+ * zstd_compress_sequences_and_literals() - compress an array of zstd_sequence and literals -+ * -+ * @cctx: The zstd compression context. -+ * @dst: The buffer to compress the data into. -+ * @dst_capacity: The size of the destination buffer. -+ * @in_seqs: The array of zstd_sequence to compress. -+ * @in_seqs_size: The number of sequences in in_seqs. -+ * @literals: The literals associated to the sequences to be compressed. -+ * @lit_size: The size of the literals in the literals buffer. -+ * @lit_capacity: The size of the literals buffer. -+ * @decompressed_size: The size of the input data -+ * -+ * Return: The compressed size or an error, which can be checked using -+ * zstd_is_error(). -+ */ -+size_t zstd_compress_sequences_and_literals(zstd_cctx *cctx, void* dst, size_t dst_capacity, -+ const zstd_sequence *in_seqs, size_t in_seqs_size, -+ const void* literals, size_t lit_size, size_t lit_capacity, -+ size_t decompressed_size); -+ - #endif /* LINUX_ZSTD_H */ ---- a/include/linux/zstd_errors.h -+++ b/include/linux/zstd_errors.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -12,13 +13,18 @@ - #define ZSTD_ERRORS_H_398273423 - - --/*===== dependency =====*/ --#include /* size_t */ -+/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ -+#define ZSTDERRORLIB_VISIBLE - -+#ifndef ZSTDERRORLIB_HIDDEN -+# if (__GNUC__ >= 4) && !defined(__MINGW32__) -+# define ZSTDERRORLIB_HIDDEN __attribute__ ((visibility ("hidden"))) -+# else -+# define ZSTDERRORLIB_HIDDEN -+# endif -+#endif - --/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ --#define ZSTDERRORLIB_VISIBILITY --#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY -+#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBLE - - /*-********************************************* - * Error codes list -@@ -43,14 +49,18 @@ typedef enum { - ZSTD_error_frameParameter_windowTooLarge = 16, - ZSTD_error_corruption_detected = 20, - ZSTD_error_checksum_wrong = 22, -+ ZSTD_error_literals_headerWrong = 24, - ZSTD_error_dictionary_corrupted = 30, - ZSTD_error_dictionary_wrong = 32, - ZSTD_error_dictionaryCreation_failed = 34, - ZSTD_error_parameter_unsupported = 40, -+ ZSTD_error_parameter_combination_unsupported = 41, - ZSTD_error_parameter_outOfBound = 42, - ZSTD_error_tableLog_tooLarge = 44, - ZSTD_error_maxSymbolValue_tooLarge = 46, - ZSTD_error_maxSymbolValue_tooSmall = 48, -+ ZSTD_error_cannotProduce_uncompressedBlock = 49, -+ ZSTD_error_stabilityCondition_notRespected = 50, - ZSTD_error_stage_wrong = 60, - ZSTD_error_init_missing = 62, - ZSTD_error_memory_allocation = 64, -@@ -58,18 +68,18 @@ typedef enum { - ZSTD_error_dstSize_tooSmall = 70, - ZSTD_error_srcSize_wrong = 72, - ZSTD_error_dstBuffer_null = 74, -+ ZSTD_error_noForwardProgress_destFull = 80, -+ ZSTD_error_noForwardProgress_inputEmpty = 82, - /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ - ZSTD_error_frameIndex_tooLarge = 100, - ZSTD_error_seekableIO = 102, - ZSTD_error_dstBuffer_wrong = 104, - ZSTD_error_srcBuffer_wrong = 105, -+ ZSTD_error_sequenceProducer_failed = 106, -+ ZSTD_error_externalSequences_invalid = 107, - ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ - } ZSTD_ErrorCode; - --/*! ZSTD_getErrorCode() : -- convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, -- which can be used to compare with enum list published above */ --ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); - ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /*< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ - - ---- a/include/linux/zstd_lib.h -+++ b/include/linux/zstd_lib.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -11,23 +12,47 @@ - #ifndef ZSTD_H_235446 - #define ZSTD_H_235446 - --/* ====== Dependency ======*/ --#include /* INT_MAX */ -+ -+/* ====== Dependencies ======*/ - #include /* size_t */ - -+#include /* list of errors */ -+#if !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) -+#include /* INT_MAX */ -+#endif /* ZSTD_STATIC_LINKING_ONLY */ -+ - - /* ===== ZSTDLIB_API : control library symbols visibility ===== */ --#ifndef ZSTDLIB_VISIBLE -+#define ZSTDLIB_VISIBLE -+ -+#ifndef ZSTDLIB_HIDDEN - # if (__GNUC__ >= 4) && !defined(__MINGW32__) --# define ZSTDLIB_VISIBLE __attribute__ ((visibility ("default"))) - # define ZSTDLIB_HIDDEN __attribute__ ((visibility ("hidden"))) - # else --# define ZSTDLIB_VISIBLE - # define ZSTDLIB_HIDDEN - # endif - #endif -+ - #define ZSTDLIB_API ZSTDLIB_VISIBLE - -+/* Deprecation warnings : -+ * Should these warnings be a problem, it is generally possible to disable them, -+ * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual. -+ * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS. -+ */ -+#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS -+# define ZSTD_DEPRECATED(message) /* disable deprecation warnings */ -+#else -+# if (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) || defined(__IAR_SYSTEMS_ICC__) -+# define ZSTD_DEPRECATED(message) __attribute__((deprecated(message))) -+# elif (__GNUC__ >= 3) -+# define ZSTD_DEPRECATED(message) __attribute__((deprecated)) -+# else -+# pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler") -+# define ZSTD_DEPRECATED(message) -+# endif -+#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */ -+ - - /* ***************************************************************************** - Introduction -@@ -65,7 +90,7 @@ - /*------ Version ------*/ - #define ZSTD_VERSION_MAJOR 1 - #define ZSTD_VERSION_MINOR 5 --#define ZSTD_VERSION_RELEASE 2 -+#define ZSTD_VERSION_RELEASE 7 - #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) - - /*! ZSTD_versionNumber() : -@@ -103,11 +128,12 @@ ZSTDLIB_API const char* ZSTD_versionStri - - - /* ************************************* --* Simple API -+* Simple Core API - ***************************************/ - /*! ZSTD_compress() : - * Compresses `src` content as a single zstd compressed frame into already allocated `dst`. -- * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. -+ * NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have -+ * enough space to successfully compress the data. - * @return : compressed size written into `dst` (<= `dstCapacity), - * or an error code if it fails (which can be tested using ZSTD_isError()). */ - ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, -@@ -115,47 +141,55 @@ ZSTDLIB_API size_t ZSTD_compress( void* - int compressionLevel); - - /*! ZSTD_decompress() : -- * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. -- * `dstCapacity` is an upper bound of originalSize to regenerate. -- * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. -- * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), -- * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ -+ * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. -+ * Multiple compressed frames can be decompressed at once with this method. -+ * The result will be the concatenation of all decompressed frames, back to back. -+ * `dstCapacity` is an upper bound of originalSize to regenerate. -+ * First frame's decompressed size can be extracted using ZSTD_getFrameContentSize(). -+ * If maximum upper bound isn't known, prefer using streaming mode to decompress data. -+ * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), -+ * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ - ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, - const void* src, size_t compressedSize); - -+ -+/*====== Decompression helper functions ======*/ -+ - /*! ZSTD_getFrameContentSize() : requires v1.3.0+ -- * `src` should point to the start of a ZSTD encoded frame. -- * `srcSize` must be at least as large as the frame header. -- * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. -- * @return : - decompressed size of `src` frame content, if known -- * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined -- * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) -- * note 1 : a 0 return value means the frame is valid but "empty". -- * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. -- * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. -- * In which case, it's necessary to use streaming mode to decompress data. -- * Optionally, application can rely on some implicit limit, -- * as ZSTD_decompress() only needs an upper bound of decompressed size. -- * (For example, data could be necessarily cut into blocks <= 16 KB). -- * note 3 : decompressed size is always present when compression is completed using single-pass functions, -- * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). -- * note 4 : decompressed size can be very large (64-bits value), -- * potentially larger than what local system can handle as a single memory segment. -- * In which case, it's necessary to use streaming mode to decompress data. -- * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. -- * Always ensure return value fits within application's authorized limits. -- * Each application can set its own limits. -- * note 6 : This function replaces ZSTD_getDecompressedSize() */ -+ * `src` should point to the start of a ZSTD encoded frame. -+ * `srcSize` must be at least as large as the frame header. -+ * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. -+ * @return : - decompressed size of `src` frame content, if known -+ * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined -+ * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) -+ * note 1 : a 0 return value means the frame is valid but "empty". -+ * When invoking this method on a skippable frame, it will return 0. -+ * note 2 : decompressed size is an optional field, it may not be present (typically in streaming mode). -+ * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. -+ * In which case, it's necessary to use streaming mode to decompress data. -+ * Optionally, application can rely on some implicit limit, -+ * as ZSTD_decompress() only needs an upper bound of decompressed size. -+ * (For example, data could be necessarily cut into blocks <= 16 KB). -+ * note 3 : decompressed size is always present when compression is completed using single-pass functions, -+ * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). -+ * note 4 : decompressed size can be very large (64-bits value), -+ * potentially larger than what local system can handle as a single memory segment. -+ * In which case, it's necessary to use streaming mode to decompress data. -+ * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. -+ * Always ensure return value fits within application's authorized limits. -+ * Each application can set its own limits. -+ * note 6 : This function replaces ZSTD_getDecompressedSize() */ - #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) - #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) - ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); - --/*! ZSTD_getDecompressedSize() : -- * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). -+/*! ZSTD_getDecompressedSize() (obsolete): -+ * This function is now obsolete, in favor of ZSTD_getFrameContentSize(). - * Both functions work the same way, but ZSTD_getDecompressedSize() blends - * "empty", "unknown" and "error" results to the same return value (0), - * while ZSTD_getFrameContentSize() gives them separate return values. - * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ -+ZSTD_DEPRECATED("Replaced by ZSTD_getFrameContentSize") - ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); - - /*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+ -@@ -163,18 +197,50 @@ ZSTDLIB_API unsigned long long ZSTD_getD - * `srcSize` must be >= first frame size - * @return : the compressed size of the first frame starting at `src`, - * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, -- * or an error code if input is invalid */ -+ * or an error code if input is invalid -+ * Note 1: this method is called _find*() because it's not enough to read the header, -+ * it may have to scan through the frame's content, to reach its end. -+ * Note 2: this method also works with Skippable Frames. In which case, -+ * it returns the size of the complete skippable frame, -+ * which is always equal to its content size + 8 bytes for headers. */ - ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); - - --/*====== Helper functions ======*/ --#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ --ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ --ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ --ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ --ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */ --ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ --ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */ -+/*====== Compression helper functions ======*/ -+ -+/*! ZSTD_compressBound() : -+ * maximum compressed size in worst case single-pass scenario. -+ * When invoking `ZSTD_compress()`, or any other one-pass compression function, -+ * it's recommended to provide @dstCapacity >= ZSTD_compressBound(srcSize) -+ * as it eliminates one potential failure scenario, -+ * aka not enough room in dst buffer to write the compressed frame. -+ * Note : ZSTD_compressBound() itself can fail, if @srcSize >= ZSTD_MAX_INPUT_SIZE . -+ * In which case, ZSTD_compressBound() will return an error code -+ * which can be tested using ZSTD_isError(). -+ * -+ * ZSTD_COMPRESSBOUND() : -+ * same as ZSTD_compressBound(), but as a macro. -+ * It can be used to produce constants, which can be useful for static allocation, -+ * for example to size a static array on stack. -+ * Will produce constant value 0 if srcSize is too large. -+ */ -+#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00ULL : 0xFF00FF00U) -+#define ZSTD_COMPRESSBOUND(srcSize) (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ -+ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ -+ -+ -+/*====== Error helper functions ======*/ -+/* ZSTD_isError() : -+ * Most ZSTD_* functions returning a size_t value can be tested for error, -+ * using ZSTD_isError(). -+ * @return 1 if error, 0 otherwise -+ */ -+ZSTDLIB_API unsigned ZSTD_isError(size_t result); /*!< tells if a `size_t` function result is an error code */ -+ZSTDLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); /* convert a result into an error code, which can be compared to error enum list */ -+ZSTDLIB_API const char* ZSTD_getErrorName(size_t result); /*!< provides readable string from a function result */ -+ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */ -+ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ -+ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */ - - - /* ************************************* -@@ -182,25 +248,25 @@ ZSTDLIB_API int ZSTD_defaultCLev - ***************************************/ - /*= Compression context - * When compressing many times, -- * it is recommended to allocate a context just once, -- * and re-use it for each successive compression operation. -- * This will make workload friendlier for system's memory. -+ * it is recommended to allocate a compression context just once, -+ * and reuse it for each successive compression operation. -+ * This will make the workload easier for system's memory. - * Note : re-using context is just a speed / resource optimization. - * It doesn't change the compression ratio, which remains identical. -- * Note 2 : In multi-threaded environments, -- * use one different context per thread for parallel execution. -+ * Note 2: For parallel execution in multi-threaded environments, -+ * use one different context per thread . - */ - typedef struct ZSTD_CCtx_s ZSTD_CCtx; - ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); --ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* accept NULL pointer */ -+ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* compatible with NULL pointer */ - - /*! ZSTD_compressCCtx() : - * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. -- * Important : in order to behave similarly to `ZSTD_compress()`, -- * this function compresses at requested compression level, -- * __ignoring any other parameter__ . -+ * Important : in order to mirror `ZSTD_compress()` behavior, -+ * this function compresses at the requested compression level, -+ * __ignoring any other advanced parameter__ . - * If any advanced parameter was set using the advanced API, -- * they will all be reset. Only `compressionLevel` remains. -+ * they will all be reset. Only @compressionLevel remains. - */ - ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, -@@ -210,7 +276,7 @@ ZSTDLIB_API size_t ZSTD_compressCCtx(ZST - /*= Decompression context - * When decompressing many times, - * it is recommended to allocate a context only once, -- * and re-use it for each successive compression operation. -+ * and reuse it for each successive compression operation. - * This will make workload friendlier for system's memory. - * Use one context per thread for parallel execution. */ - typedef struct ZSTD_DCtx_s ZSTD_DCtx; -@@ -220,7 +286,7 @@ ZSTDLIB_API size_t ZSTD_freeDCtx(ZST - /*! ZSTD_decompressDCtx() : - * Same as ZSTD_decompress(), - * requires an allocated ZSTD_DCtx. -- * Compatible with sticky parameters. -+ * Compatible with sticky parameters (see below). - */ - ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, -@@ -236,12 +302,12 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(Z - * using ZSTD_CCtx_set*() functions. - * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. - * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! -- * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . -+ * __They do not apply to one-shot variants such as ZSTD_compressCCtx()__ . - * - * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). - * - * This API supersedes all other "advanced" API entry points in the experimental section. -- * In the future, we expect to remove from experimental API entry points which are redundant with this API. -+ * In the future, we expect to remove API entry points from experimental which are redundant with this API. - */ - - -@@ -324,6 +390,19 @@ typedef enum { - * The higher the value of selected strategy, the more complex it is, - * resulting in stronger and slower compression. - * Special: value 0 means "use default strategy". */ -+ -+ ZSTD_c_targetCBlockSize=130, /* v1.5.6+ -+ * Attempts to fit compressed block size into approximately targetCBlockSize. -+ * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX. -+ * Note that it's not a guarantee, just a convergence target (default:0). -+ * No target when targetCBlockSize == 0. -+ * This is helpful in low bandwidth streaming environments to improve end-to-end latency, -+ * when a client can make use of partial documents (a prominent example being Chrome). -+ * Note: this parameter is stable since v1.5.6. -+ * It was present as an experimental parameter in earlier versions, -+ * but it's not recommended using it with earlier library versions -+ * due to massive performance regressions. -+ */ - /* LDM mode parameters */ - ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. - * This parameter is designed to improve compression ratio -@@ -403,15 +482,18 @@ typedef enum { - * ZSTD_c_forceMaxWindow - * ZSTD_c_forceAttachDict - * ZSTD_c_literalCompressionMode -- * ZSTD_c_targetCBlockSize - * ZSTD_c_srcSizeHint - * ZSTD_c_enableDedicatedDictSearch - * ZSTD_c_stableInBuffer - * ZSTD_c_stableOutBuffer - * ZSTD_c_blockDelimiters - * ZSTD_c_validateSequences -- * ZSTD_c_useBlockSplitter -+ * ZSTD_c_blockSplitterLevel -+ * ZSTD_c_splitAfterSequences - * ZSTD_c_useRowMatchFinder -+ * ZSTD_c_prefetchCDictTables -+ * ZSTD_c_enableSeqProducerFallback -+ * ZSTD_c_maxBlockSize - * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. - * note : never ever use experimentalParam? names directly; - * also, the enums values themselves are unstable and can still change. -@@ -421,7 +503,7 @@ typedef enum { - ZSTD_c_experimentalParam3=1000, - ZSTD_c_experimentalParam4=1001, - ZSTD_c_experimentalParam5=1002, -- ZSTD_c_experimentalParam6=1003, -+ /* was ZSTD_c_experimentalParam6=1003; is now ZSTD_c_targetCBlockSize */ - ZSTD_c_experimentalParam7=1004, - ZSTD_c_experimentalParam8=1005, - ZSTD_c_experimentalParam9=1006, -@@ -430,7 +512,12 @@ typedef enum { - ZSTD_c_experimentalParam12=1009, - ZSTD_c_experimentalParam13=1010, - ZSTD_c_experimentalParam14=1011, -- ZSTD_c_experimentalParam15=1012 -+ ZSTD_c_experimentalParam15=1012, -+ ZSTD_c_experimentalParam16=1013, -+ ZSTD_c_experimentalParam17=1014, -+ ZSTD_c_experimentalParam18=1015, -+ ZSTD_c_experimentalParam19=1016, -+ ZSTD_c_experimentalParam20=1017 - } ZSTD_cParameter; - - typedef struct { -@@ -493,7 +580,7 @@ typedef enum { - * They will be used to compress next frame. - * Resetting session never fails. - * - The parameters : changes all parameters back to "default". -- * This removes any reference to any dictionary too. -+ * This also removes any reference to any dictionary or external sequence producer. - * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) - * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) - * - Both : similar to resetting the session, followed by resetting parameters. -@@ -502,11 +589,13 @@ ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_ - - /*! ZSTD_compress2() : - * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. -+ * (note that this entry point doesn't even expose a compression level parameter). - * ZSTD_compress2() always starts a new frame. - * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. - * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() - * - The function is always blocking, returns when compression is completed. -- * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. -+ * NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have -+ * enough space to successfully compress the data, though it is possible it fails for other reasons. - * @return : compressed size written into `dst` (<= `dstCapacity), - * or an error code if it fails (which can be tested using ZSTD_isError()). - */ -@@ -543,13 +632,17 @@ typedef enum { - * ZSTD_d_stableOutBuffer - * ZSTD_d_forceIgnoreChecksum - * ZSTD_d_refMultipleDDicts -+ * ZSTD_d_disableHuffmanAssembly -+ * ZSTD_d_maxBlockSize - * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. - * note : never ever use experimentalParam? names directly - */ - ZSTD_d_experimentalParam1=1000, - ZSTD_d_experimentalParam2=1001, - ZSTD_d_experimentalParam3=1002, -- ZSTD_d_experimentalParam4=1003 -+ ZSTD_d_experimentalParam4=1003, -+ ZSTD_d_experimentalParam5=1004, -+ ZSTD_d_experimentalParam6=1005 - - } ZSTD_dParameter; - -@@ -604,14 +697,14 @@ typedef struct ZSTD_outBuffer_s { - * A ZSTD_CStream object is required to track streaming operation. - * Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. - * ZSTD_CStream objects can be reused multiple times on consecutive compression operations. --* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. -+* It is recommended to reuse ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. - * - * For parallel execution, use one separate ZSTD_CStream per thread. - * - * note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. - * - * Parameters are sticky : when starting a new compression on the same context, --* it will re-use the same sticky parameters as previous compression session. -+* it will reuse the same sticky parameters as previous compression session. - * When in doubt, it's recommended to fully initialize the context before usage. - * Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), - * ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to -@@ -700,6 +793,11 @@ typedef enum { - * only ZSTD_e_end or ZSTD_e_flush operations are allowed. - * Before starting a new compression job, or changing compression parameters, - * it is required to fully flush internal buffers. -+ * - note: if an operation ends with an error, it may leave @cctx in an undefined state. -+ * Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state. -+ * In order to be re-employed after an error, a state must be reset, -+ * which can be done explicitly (ZSTD_CCtx_reset()), -+ * or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx()) - */ - ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, - ZSTD_outBuffer* output, -@@ -728,8 +826,6 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(v - * This following is a legacy streaming API, available since v1.0+ . - * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). - * It is redundant, but remains fully supported. -- * Streaming in combination with advanced parameters and dictionary compression -- * can only be used through the new API. - ******************************************************************************/ - - /*! -@@ -738,6 +834,9 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(v - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) - * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); -+ * -+ * Note that ZSTD_initCStream() clears any previously set dictionary. Use the new API -+ * to compress with a dictionary. - */ - ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); - /*! -@@ -758,7 +857,7 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_C - * - * A ZSTD_DStream object is required to track streaming operations. - * Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. --* ZSTD_DStream objects can be re-used multiple times. -+* ZSTD_DStream objects can be re-employed multiple times. - * - * Use ZSTD_initDStream() to start a new decompression operation. - * @return : recommended first input size -@@ -768,16 +867,21 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_C - * The function will update both `pos` fields. - * If `input.pos < input.size`, some input has not been consumed. - * It's up to the caller to present again remaining data. -+* - * The function tries to flush all data decoded immediately, respecting output buffer size. - * If `output.pos < output.size`, decoder has flushed everything it could. --* But if `output.pos == output.size`, there might be some data left within internal buffers., -+* -+* However, when `output.pos == output.size`, it's more difficult to know. -+* If @return > 0, the frame is not complete, meaning -+* either there is still some data left to flush within internal buffers, -+* or there is more input to read to complete the frame (or both). - * In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. - * Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. - * @return : 0 when a frame is completely decoded and fully flushed, - * or an error code, which can be tested using ZSTD_isError(), - * or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : - * the return value is a suggested next input size (just a hint for better latency) --* that will never request more than the remaining frame size. -+* that will never request more than the remaining content of the compressed frame. - * *******************************************************************************/ - - typedef ZSTD_DCtx ZSTD_DStream; /*< DCtx and DStream are now effectively same object (>= v1.3.0) */ -@@ -788,13 +892,38 @@ ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD - - /*===== Streaming decompression functions =====*/ - --/* This function is redundant with the advanced API and equivalent to: -+/*! ZSTD_initDStream() : -+ * Initialize/reset DStream state for new decompression operation. -+ * Call before new decompression operation using same DStream. - * -+ * Note : This function is redundant with the advanced API and equivalent to: - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * ZSTD_DCtx_refDDict(zds, NULL); - */ - ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); - -+/*! ZSTD_decompressStream() : -+ * Streaming decompression function. -+ * Call repetitively to consume full input updating it as necessary. -+ * Function will update both input and output `pos` fields exposing current state via these fields: -+ * - `input.pos < input.size`, some input remaining and caller should provide remaining input -+ * on the next call. -+ * - `output.pos < output.size`, decoder flushed internal output buffer. -+ * - `output.pos == output.size`, unflushed data potentially present in the internal buffers, -+ * check ZSTD_decompressStream() @return value, -+ * if > 0, invoke it again to flush remaining data to output. -+ * Note : with no additional input, amount of data flushed <= ZSTD_BLOCKSIZE_MAX. -+ * -+ * @return : 0 when a frame is completely decoded and fully flushed, -+ * or an error code, which can be tested using ZSTD_isError(), -+ * or any other value > 0, which means there is some decoding or flushing to do to complete current frame. -+ * -+ * Note: when an operation returns with an error code, the @zds state may be left in undefined state. -+ * It's UB to invoke `ZSTD_decompressStream()` on such a state. -+ * In order to re-use such a state, it must be first reset, -+ * which can be done explicitly (`ZSTD_DCtx_reset()`), -+ * or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`) -+ */ - ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); - - ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ -@@ -913,7 +1042,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_from - * If @return == 0, the dictID could not be decoded. - * This could for one of the following reasons : - * - The frame does not require a dictionary to be decoded (most common case). -- * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. -+ * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden piece of information. - * Note : this use case also happens when using a non-conformant dictionary. - * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). - * - This is not a Zstandard frame. -@@ -925,9 +1054,11 @@ ZSTDLIB_API unsigned ZSTD_getDictID_from - * Advanced dictionary and prefix API (Requires v1.4.0+) - * - * This API allows dictionaries to be used with ZSTD_compress2(), -- * ZSTD_compressStream2(), and ZSTD_decompressDCtx(). Dictionaries are sticky, and -- * only reset with the context is reset with ZSTD_reset_parameters or -- * ZSTD_reset_session_and_parameters. Prefixes are single-use. -+ * ZSTD_compressStream2(), and ZSTD_decompressDCtx(). -+ * Dictionaries are sticky, they remain valid when same context is reused, -+ * they only reset when the context is reset -+ * with ZSTD_reset_parameters or ZSTD_reset_session_and_parameters. -+ * In contrast, Prefixes are single-use. - ******************************************************************************/ - - -@@ -937,8 +1068,9 @@ ZSTDLIB_API unsigned ZSTD_getDictID_from - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, - * meaning "return to no-dictionary mode". -- * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. -- * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). -+ * Note 1 : Dictionary is sticky, it will be used for all future compressed frames, -+ * until parameters are reset, a new dictionary is loaded, or the dictionary -+ * is explicitly invalidated by loading a NULL dictionary. - * Note 2 : Loading a dictionary involves building tables. - * It's also a CPU consuming operation, with non-negligible impact on latency. - * Tables are dependent on compression parameters, and for this reason, -@@ -947,11 +1079,15 @@ ZSTDLIB_API unsigned ZSTD_getDictID_from - * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. - * In such a case, dictionary buffer must outlive its users. - * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() -- * to precisely select how dictionary content must be interpreted. */ -+ * to precisely select how dictionary content must be interpreted. -+ * Note 5 : This method does not benefit from LDM (long distance mode). -+ * If you want to employ LDM on some large dictionary content, -+ * prefer employing ZSTD_CCtx_refPrefix() described below. -+ */ - ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); - - /*! ZSTD_CCtx_refCDict() : Requires v1.4.0+ -- * Reference a prepared dictionary, to be used for all next compressed frames. -+ * Reference a prepared dictionary, to be used for all future compressed frames. - * Note that compression parameters are enforced from within CDict, - * and supersede any compression parameter previously set within CCtx. - * The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. -@@ -970,6 +1106,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZS - * Decompression will need same prefix to properly regenerate data. - * Compressing with a prefix is similar in outcome as performing a diff and compressing it, - * but performs much faster, especially during decompression (compression speed is tunable with compression level). -+ * This method is compatible with LDM (long distance mode). - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary - * Note 1 : Prefix buffer is referenced. It **must** outlive compression. -@@ -986,9 +1123,9 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(Z - const void* prefix, size_t prefixSize); - - /*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+ -- * Create an internal DDict from dict buffer, -- * to be used to decompress next frames. -- * The dictionary remains valid for all future frames, until explicitly invalidated. -+ * Create an internal DDict from dict buffer, to be used to decompress all future frames. -+ * The dictionary remains valid for all future frames, until explicitly invalidated, or -+ * a new dictionary is loaded. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, - * meaning "return to no-dictionary mode". -@@ -1012,9 +1149,10 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDiction - * The memory for the table is allocated on the first call to refDDict, and can be - * freed with ZSTD_freeDCtx(). - * -+ * If called with ZSTD_d_refMultipleDDicts disabled (the default), only one dictionary -+ * will be managed, and referencing a dictionary effectively "discards" any previous one. -+ * - * @result : 0, or an error code (which can be tested with ZSTD_isError()). -- * Note 1 : Currently, only one dictionary can be managed. -- * Referencing a new dictionary effectively "discards" any previous one. - * Special: referencing a NULL DDict means "return to no-dictionary mode". - * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. - */ -@@ -1051,6 +1189,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DStream(c - ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); - ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); - -+ - #endif /* ZSTD_H_235446 */ - - -@@ -1066,29 +1205,12 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(con - #if !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) - #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY - -+ - /* This can be overridden externally to hide static symbols. */ - #ifndef ZSTDLIB_STATIC_API - #define ZSTDLIB_STATIC_API ZSTDLIB_VISIBLE - #endif - --/* Deprecation warnings : -- * Should these warnings be a problem, it is generally possible to disable them, -- * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual. -- * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS. -- */ --#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS --# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API /* disable deprecation warnings */ --#else --# if (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) --# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated(message))) --# elif (__GNUC__ >= 3) --# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated)) --# else --# pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler") --# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API --# endif --#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */ -- - /* ************************************************************************************** - * experimental API (static linking only) - **************************************************************************************** -@@ -1123,6 +1245,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(con - #define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ - #define ZSTD_STRATEGY_MIN ZSTD_fast - #define ZSTD_STRATEGY_MAX ZSTD_btultra2 -+#define ZSTD_BLOCKSIZE_MAX_MIN (1 << 10) /* The minimum valid max blocksize. Maximum blocksizes smaller than this make compressBound() inaccurate. */ - - - #define ZSTD_OVERLAPLOG_MIN 0 -@@ -1146,7 +1269,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(con - #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) - - /* Advanced parameter bounds */ --#define ZSTD_TARGETCBLOCKSIZE_MIN 64 -+#define ZSTD_TARGETCBLOCKSIZE_MIN 1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */ - #define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX - #define ZSTD_SRCSIZEHINT_MIN 0 - #define ZSTD_SRCSIZEHINT_MAX INT_MAX -@@ -1188,7 +1311,7 @@ typedef struct { - * - * Note: This field is optional. ZSTD_generateSequences() will calculate the value of - * 'rep', but repeat offsets do not necessarily need to be calculated from an external -- * sequence provider's perspective. For example, ZSTD_compressSequences() does not -+ * sequence provider perspective. For example, ZSTD_compressSequences() does not - * use this 'rep' field at all (as of now). - */ - } ZSTD_Sequence; -@@ -1293,17 +1416,18 @@ typedef enum { - } ZSTD_literalCompressionMode_e; - - typedef enum { -- /* Note: This enum controls features which are conditionally beneficial. Zstd typically will make a final -- * decision on whether or not to enable the feature (ZSTD_ps_auto), but setting the switch to ZSTD_ps_enable -- * or ZSTD_ps_disable allow for a force enable/disable the feature. -+ /* Note: This enum controls features which are conditionally beneficial. -+ * Zstd can take a decision on whether or not to enable the feature (ZSTD_ps_auto), -+ * but setting the switch to ZSTD_ps_enable or ZSTD_ps_disable force enable/disable the feature. - */ - ZSTD_ps_auto = 0, /* Let the library automatically determine whether the feature shall be enabled */ - ZSTD_ps_enable = 1, /* Force-enable the feature */ - ZSTD_ps_disable = 2 /* Do not use the feature */ --} ZSTD_paramSwitch_e; -+} ZSTD_ParamSwitch_e; -+#define ZSTD_paramSwitch_e ZSTD_ParamSwitch_e /* old name */ - - /* ************************************* --* Frame size functions -+* Frame header and size functions - ***************************************/ - - /*! ZSTD_findDecompressedSize() : -@@ -1345,34 +1469,130 @@ ZSTDLIB_STATIC_API unsigned long long ZS - ZSTDLIB_STATIC_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); - - /*! ZSTD_frameHeaderSize() : -- * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. -+ * srcSize must be large enough, aka >= ZSTD_FRAMEHEADERSIZE_PREFIX. - * @return : size of the Frame Header, - * or an error code (if srcSize is too small) */ - ZSTDLIB_STATIC_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); - -+typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_FrameType_e; -+#define ZSTD_frameType_e ZSTD_FrameType_e /* old name */ -+typedef struct { -+ unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ -+ unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ -+ unsigned blockSizeMax; -+ ZSTD_FrameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ -+ unsigned headerSize; -+ unsigned dictID; /* for ZSTD_skippableFrame, contains the skippable magic variant [0-15] */ -+ unsigned checksumFlag; -+ unsigned _reserved1; -+ unsigned _reserved2; -+} ZSTD_FrameHeader; -+#define ZSTD_frameHeader ZSTD_FrameHeader /* old name */ -+ -+/*! ZSTD_getFrameHeader() : -+ * decode Frame Header into `zfhPtr`, or requires larger `srcSize`. -+ * @return : 0 => header is complete, `zfhPtr` is correctly filled, -+ * >0 => `srcSize` is too small, @return value is the wanted `srcSize` amount, `zfhPtr` is not filled, -+ * or an error code, which can be tested using ZSTD_isError() */ -+ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize); -+/*! ZSTD_getFrameHeader_advanced() : -+ * same as ZSTD_getFrameHeader(), -+ * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ -+ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); -+ -+/*! ZSTD_decompressionMargin() : -+ * Zstd supports in-place decompression, where the input and output buffers overlap. -+ * In this case, the output buffer must be at least (Margin + Output_Size) bytes large, -+ * and the input buffer must be at the end of the output buffer. -+ * -+ * _______________________ Output Buffer ________________________ -+ * | | -+ * | ____ Input Buffer ____| -+ * | | | -+ * v v v -+ * |---------------------------------------|-----------|----------| -+ * ^ ^ ^ -+ * |___________________ Output_Size ___________________|_ Margin _| -+ * -+ * NOTE: See also ZSTD_DECOMPRESSION_MARGIN(). -+ * NOTE: This applies only to single-pass decompression through ZSTD_decompress() or -+ * ZSTD_decompressDCtx(). -+ * NOTE: This function supports multi-frame input. -+ * -+ * @param src The compressed frame(s) -+ * @param srcSize The size of the compressed frame(s) -+ * @returns The decompression margin or an error that can be checked with ZSTD_isError(). -+ */ -+ZSTDLIB_STATIC_API size_t ZSTD_decompressionMargin(const void* src, size_t srcSize); -+ -+/*! ZSTD_DECOMPRESS_MARGIN() : -+ * Similar to ZSTD_decompressionMargin(), but instead of computing the margin from -+ * the compressed frame, compute it from the original size and the blockSizeLog. -+ * See ZSTD_decompressionMargin() for details. -+ * -+ * WARNING: This macro does not support multi-frame input, the input must be a single -+ * zstd frame. If you need that support use the function, or implement it yourself. -+ * -+ * @param originalSize The original uncompressed size of the data. -+ * @param blockSize The block size == MIN(windowSize, ZSTD_BLOCKSIZE_MAX). -+ * Unless you explicitly set the windowLog smaller than -+ * ZSTD_BLOCKSIZELOG_MAX you can just use ZSTD_BLOCKSIZE_MAX. -+ */ -+#define ZSTD_DECOMPRESSION_MARGIN(originalSize, blockSize) ((size_t)( \ -+ ZSTD_FRAMEHEADERSIZE_MAX /* Frame header */ + \ -+ 4 /* checksum */ + \ -+ ((originalSize) == 0 ? 0 : 3 * (((originalSize) + (blockSize) - 1) / blockSize)) /* 3 bytes per block */ + \ -+ (blockSize) /* One block of margin */ \ -+ )) -+ - typedef enum { -- ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ -- ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ --} ZSTD_sequenceFormat_e; -+ ZSTD_sf_noBlockDelimiters = 0, /* ZSTD_Sequence[] has no block delimiters, just sequences */ -+ ZSTD_sf_explicitBlockDelimiters = 1 /* ZSTD_Sequence[] contains explicit block delimiters */ -+} ZSTD_SequenceFormat_e; -+#define ZSTD_sequenceFormat_e ZSTD_SequenceFormat_e /* old name */ -+ -+/*! ZSTD_sequenceBound() : -+ * `srcSize` : size of the input buffer -+ * @return : upper-bound for the number of sequences that can be generated -+ * from a buffer of srcSize bytes -+ * -+ * note : returns number of sequences - to get bytes, multiply by sizeof(ZSTD_Sequence). -+ */ -+ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize); - - /*! ZSTD_generateSequences() : -- * Generate sequences using ZSTD_compress2, given a source buffer. -+ * WARNING: This function is meant for debugging and informational purposes ONLY! -+ * Its implementation is flawed, and it will be deleted in a future version. -+ * It is not guaranteed to succeed, as there are several cases where it will give -+ * up and fail. You should NOT use this function in production code. -+ * -+ * This function is deprecated, and will be removed in a future version. -+ * -+ * Generate sequences using ZSTD_compress2(), given a source buffer. -+ * -+ * @param zc The compression context to be used for ZSTD_compress2(). Set any -+ * compression parameters you need on this context. -+ * @param outSeqs The output sequences buffer of size @p outSeqsSize -+ * @param outSeqsCapacity The size of the output sequences buffer. -+ * ZSTD_sequenceBound(srcSize) is an upper bound on the number -+ * of sequences that can be generated. -+ * @param src The source buffer to generate sequences from of size @p srcSize. -+ * @param srcSize The size of the source buffer. - * - * Each block will end with a dummy sequence - * with offset == 0, matchLength == 0, and litLength == length of last literals. - * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) - * simply acts as a block delimiter. - * -- * zc can be used to insert custom compression params. -- * This function invokes ZSTD_compress2 -- * -- * The output of this function can be fed into ZSTD_compressSequences() with CCtx -- * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters -- * @return : number of sequences generated -- */ -- --ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, -- size_t outSeqsSize, const void* src, size_t srcSize); -+ * @returns The number of sequences generated, necessarily less than -+ * ZSTD_sequenceBound(srcSize), or an error code that can be checked -+ * with ZSTD_isError(). -+ */ -+ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()") -+ZSTDLIB_STATIC_API size_t -+ZSTD_generateSequences(ZSTD_CCtx* zc, -+ ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, -+ const void* src, size_t srcSize); - - /*! ZSTD_mergeBlockDelimiters() : - * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals -@@ -1388,8 +1608,10 @@ ZSTDLIB_STATIC_API size_t ZSTD_generateS - ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); - - /*! ZSTD_compressSequences() : -- * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst. -- * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.) -+ * Compress an array of ZSTD_Sequence, associated with @src buffer, into dst. -+ * @src contains the entire input (not just the literals). -+ * If @srcSize > sum(sequence.length), the remaining bytes are considered all literals -+ * If a dictionary is included, then the cctx should reference the dict (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.). - * The entire source is compressed into a single frame. - * - * The compression behavior changes based on cctx params. In particular: -@@ -1398,11 +1620,17 @@ ZSTDLIB_STATIC_API size_t ZSTD_mergeBloc - * the block size derived from the cctx, and sequences may be split. This is the default setting. - * - * If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain -- * block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. -+ * valid block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. - * -- * If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined -- * behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for -- * specifics regarding offset/matchlength requirements) then the function will bail out and return an error. -+ * When ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, it's possible to decide generating repcodes -+ * using the advanced parameter ZSTD_c_repcodeResolution. Repcodes will improve compression ratio, though the benefit -+ * can vary greatly depending on Sequences. On the other hand, repcode resolution is an expensive operation. -+ * By default, it's disabled at low (<10) compression levels, and enabled above the threshold (>=10). -+ * ZSTD_c_repcodeResolution makes it possible to directly manage this processing in either direction. -+ * -+ * If ZSTD_c_validateSequences == 0, this function blindly accepts the Sequences provided. Invalid Sequences cause undefined -+ * behavior. If ZSTD_c_validateSequences == 1, then the function will detect invalid Sequences (see doc/zstd_compression_format.md for -+ * specifics regarding offset/matchlength requirements) and then bail out and return an error. - * - * In addition to the two adjustable experimental params, there are other important cctx params. - * - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN. -@@ -1410,14 +1638,42 @@ ZSTDLIB_STATIC_API size_t ZSTD_mergeBloc - * - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset - * is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md - * -- * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused. -- * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly, -- * and cannot emit an RLE block that disagrees with the repcode history -- * @return : final compressed size or a ZSTD error. -- */ --ZSTDLIB_STATIC_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, -- const ZSTD_Sequence* inSeqs, size_t inSeqsSize, -- const void* src, size_t srcSize); -+ * Note: Repcodes are, as of now, always re-calculated within this function, ZSTD_Sequence.rep is effectively unused. -+ * Dev Note: Once ability to ingest repcodes become available, the explicit block delims mode must respect those repcodes exactly, -+ * and cannot emit an RLE block that disagrees with the repcode history. -+ * @return : final compressed size, or a ZSTD error code. -+ */ -+ZSTDLIB_STATIC_API size_t -+ZSTD_compressSequences(ZSTD_CCtx* cctx, -+ void* dst, size_t dstCapacity, -+ const ZSTD_Sequence* inSeqs, size_t inSeqsSize, -+ const void* src, size_t srcSize); -+ -+ -+/*! ZSTD_compressSequencesAndLiterals() : -+ * This is a variant of ZSTD_compressSequences() which, -+ * instead of receiving (src,srcSize) as input parameter, receives (literals,litSize), -+ * aka all the literals, already extracted and laid out into a single continuous buffer. -+ * This can be useful if the process generating the sequences also happens to generate the buffer of literals, -+ * thus skipping an extraction + caching stage. -+ * It's a speed optimization, useful when the right conditions are met, -+ * but it also features the following limitations: -+ * - Only supports explicit delimiter mode -+ * - Currently does not support Sequences validation (so input Sequences are trusted) -+ * - Not compatible with frame checksum, which must be disabled -+ * - If any block is incompressible, will fail and return an error -+ * - @litSize must be == sum of all @.litLength fields in @inSeqs. Any discrepancy will generate an error. -+ * - @litBufCapacity is the size of the underlying buffer into which literals are written, starting at address @literals. -+ * @litBufCapacity must be at least 8 bytes larger than @litSize. -+ * - @decompressedSize must be correct, and correspond to the sum of all Sequences. Any discrepancy will generate an error. -+ * @return : final compressed size, or a ZSTD error code. -+ */ -+ZSTDLIB_STATIC_API size_t -+ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx, -+ void* dst, size_t dstCapacity, -+ const ZSTD_Sequence* inSeqs, size_t nbSequences, -+ const void* literals, size_t litSize, size_t litBufCapacity, -+ size_t decompressedSize); - - - /*! ZSTD_writeSkippableFrame() : -@@ -1425,8 +1681,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_compressS - * - * Skippable frames begin with a 4-byte magic number. There are 16 possible choices of magic number, - * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. -- * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so -- * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. -+ * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, -+ * so the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. - * - * Returns an error if destination buffer is not large enough, if the source size is not representable - * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid). -@@ -1434,26 +1690,28 @@ ZSTDLIB_STATIC_API size_t ZSTD_compressS - * @return : number of bytes written or a ZSTD error. - */ - ZSTDLIB_STATIC_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, -- const void* src, size_t srcSize, unsigned magicVariant); -+ const void* src, size_t srcSize, -+ unsigned magicVariant); - - /*! ZSTD_readSkippableFrame() : -- * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer. -+ * Retrieves the content of a zstd skippable frame starting at @src, and writes it to @dst buffer. - * -- * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written, -- * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested -- * in the magicVariant. -+ * The parameter @magicVariant will receive the magicVariant that was supplied when the frame was written, -+ * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. -+ * This can be NULL if the caller is not interested in the magicVariant. - * - * Returns an error if destination buffer is not large enough, or if the frame is not skippable. - * - * @return : number of bytes written or a ZSTD error. - */ --ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant, -- const void* src, size_t srcSize); -+ZSTDLIB_STATIC_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, -+ unsigned* magicVariant, -+ const void* src, size_t srcSize); - - /*! ZSTD_isSkippableFrame() : - * Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame. - */ --ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size); -+ZSTDLIB_STATIC_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size); - - - -@@ -1464,48 +1722,59 @@ ZSTDLIB_API unsigned ZSTD_isSkippableFra - /*! ZSTD_estimate*() : - * These functions make it possible to estimate memory usage - * of a future {D,C}Ctx, before its creation. -+ * This is useful in combination with ZSTD_initStatic(), -+ * which makes it possible to employ a static buffer for ZSTD_CCtx* state. - * - * ZSTD_estimateCCtxSize() will provide a memory budget large enough -- * for any compression level up to selected one. -- * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate -- * does not include space for a window buffer. -- * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. -+ * to compress data of any size using one-shot compression ZSTD_compressCCtx() or ZSTD_compress2() -+ * associated with any compression level up to max specified one. - * The estimate will assume the input may be arbitrarily large, - * which is the worst case. - * -+ * Note that the size estimation is specific for one-shot compression, -+ * it is not valid for streaming (see ZSTD_estimateCStreamSize*()) -+ * nor other potential ways of using a ZSTD_CCtx* state. -+ * - * When srcSize can be bound by a known and rather "small" value, -- * this fact can be used to provide a tighter estimation -- * because the CCtx compression context will need less memory. -- * This tighter estimation can be provided by more advanced functions -+ * this knowledge can be used to provide a tighter budget estimation -+ * because the ZSTD_CCtx* state will need less memory for small inputs. -+ * This tighter estimation can be provided by employing more advanced functions - * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), - * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). - * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. - * -- * Note 2 : only single-threaded compression is supported. -+ * Note : only single-threaded compression is supported. - * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. - */ --ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel); -+ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int maxCompressionLevel); - ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); - ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); - ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void); - - /*! ZSTD_estimateCStreamSize() : -- * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. -- * It will also consider src size to be arbitrarily "large", which is worst case. -+ * ZSTD_estimateCStreamSize() will provide a memory budget large enough for streaming compression -+ * using any compression level up to the max specified one. -+ * It will also consider src size to be arbitrarily "large", which is a worst case scenario. - * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. - * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. - * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. - * Note : CStream size estimation is only correct for single-threaded compression. -- * ZSTD_DStream memory budget depends on window Size. -+ * ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. -+ * Note 2 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time. -+ * Size estimates assume that no external sequence producer is registered. -+ * -+ * ZSTD_DStream memory budget depends on frame's window Size. - * This information can be passed manually, using ZSTD_estimateDStreamSize, - * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); -+ * Any frame requesting a window size larger than max specified one will be rejected. - * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), - * an internal ?Dict will be created, which additional size is not estimated here. -- * In this case, get total size by adding ZSTD_estimate?DictSize */ --ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel); -+ * In this case, get total size by adding ZSTD_estimate?DictSize -+ */ -+ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int maxCompressionLevel); - ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); - ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); --ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t windowSize); -+ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t maxWindowSize); - ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); - - /*! ZSTD_estimate?DictSize() : -@@ -1568,7 +1837,15 @@ typedef void (*ZSTD_freeFunction) (void - typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; - static - __attribute__((__unused__)) -+ -+#if defined(__clang__) && __clang_major__ >= 5 -+#pragma clang diagnostic push -+#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -+#endif - ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /*< this constant defers to stdlib's functions */ -+#if defined(__clang__) && __clang_major__ >= 5 -+#pragma clang diagnostic pop -+#endif - - ZSTDLIB_STATIC_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); - ZSTDLIB_STATIC_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); -@@ -1649,22 +1926,45 @@ ZSTDLIB_STATIC_API size_t ZSTD_checkCPar - * This function never fails (wide contract) */ - ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); - -+/*! ZSTD_CCtx_setCParams() : -+ * Set all parameters provided within @p cparams into the working @p cctx. -+ * Note : if modifying parameters during compression (MT mode only), -+ * note that changes to the .windowLog parameter will be ignored. -+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()). -+ * On failure, no parameters are updated. -+ */ -+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams); -+ -+/*! ZSTD_CCtx_setFParams() : -+ * Set all parameters provided within @p fparams into the working @p cctx. -+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()). -+ */ -+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams); -+ -+/*! ZSTD_CCtx_setParams() : -+ * Set all parameters provided within @p params into the working @p cctx. -+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()). -+ */ -+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params); -+ - /*! ZSTD_compress_advanced() : - * Note : this function is now DEPRECATED. - * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. - * This prototype will generate compilation warnings. */ - ZSTD_DEPRECATED("use ZSTD_compress2") -+ZSTDLIB_STATIC_API - size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, -- void* dst, size_t dstCapacity, -- const void* src, size_t srcSize, -- const void* dict,size_t dictSize, -- ZSTD_parameters params); -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize, -+ const void* dict,size_t dictSize, -+ ZSTD_parameters params); - - /*! ZSTD_compress_usingCDict_advanced() : - * Note : this function is now DEPRECATED. - * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. - * This prototype will generate compilation warnings. */ - ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary") -+ZSTDLIB_STATIC_API - size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, -@@ -1725,7 +2025,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refP - * See the comments on that enum for an explanation of the feature. */ - #define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 - --/* Controlled with ZSTD_paramSwitch_e enum. -+/* Controlled with ZSTD_ParamSwitch_e enum. - * Default is ZSTD_ps_auto. - * Set to ZSTD_ps_disable to never compress literals. - * Set to ZSTD_ps_enable to always compress literals. (Note: uncompressed literals -@@ -1737,11 +2037,6 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refP - */ - #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 - --/* Tries to fit compressed block size to be around targetCBlockSize. -- * No target when targetCBlockSize == 0. -- * There is no guarantee on compressed block size (default:0) */ --#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 -- - /* User's best guess of source size. - * Hint is not valid when srcSizeHint == 0. - * There is no guarantee that hint is close to actual source size, -@@ -1808,13 +2103,16 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refP - * Experimental parameter. - * Default is 0 == disabled. Set to 1 to enable. - * -- * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same -- * between calls, except for the modifications that zstd makes to pos (the -- * caller must not modify pos). This is checked by the compressor, and -- * compression will fail if it ever changes. This means the only flush -- * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end -- * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos) -- * MUST not be modified during compression or you will get data corruption. -+ * Tells the compressor that input data presented with ZSTD_inBuffer -+ * will ALWAYS be the same between calls. -+ * Technically, the @src pointer must never be changed, -+ * and the @pos field can only be updated by zstd. -+ * However, it's possible to increase the @size field, -+ * allowing scenarios where more data can be appended after compressions starts. -+ * These conditions are checked by the compressor, -+ * and compression will fail if they are not respected. -+ * Also, data in the ZSTD_inBuffer within the range [src, src + pos) -+ * MUST not be modified during compression or it will result in data corruption. - * - * When this flag is enabled zstd won't allocate an input window buffer, - * because the user guarantees it can reference the ZSTD_inBuffer until -@@ -1822,18 +2120,15 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refP - * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also - * avoid the memcpy() from the input buffer to the input window buffer. - * -- * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used. -- * That means this flag cannot be used with ZSTD_compressStream(). -- * - * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using - * this flag is ALWAYS memory safe, and will never access out-of-bounds -- * memory. However, compression WILL fail if you violate the preconditions. -+ * memory. However, compression WILL fail if conditions are not respected. - * -- * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST -- * not be modified during compression or you will get data corruption. This -- * is because zstd needs to reference data in the ZSTD_inBuffer to find -+ * WARNING: The data in the ZSTD_inBuffer in the range [src, src + pos) MUST -+ * not be modified during compression or it will result in data corruption. -+ * This is because zstd needs to reference data in the ZSTD_inBuffer to find - * matches. Normally zstd maintains its own window buffer for this purpose, -- * but passing this flag tells zstd to use the user provided buffer. -+ * but passing this flag tells zstd to rely on user provided buffer instead. - */ - #define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9 - -@@ -1871,22 +2166,46 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refP - /* ZSTD_c_validateSequences - * Default is 0 == disabled. Set to 1 to enable sequence validation. - * -- * For use with sequence compression API: ZSTD_compressSequences(). -- * Designates whether or not we validate sequences provided to ZSTD_compressSequences() -+ * For use with sequence compression API: ZSTD_compressSequences*(). -+ * Designates whether or not provided sequences are validated within ZSTD_compressSequences*() - * during function execution. - * -- * Without validation, providing a sequence that does not conform to the zstd spec will cause -- * undefined behavior, and may produce a corrupted block. -+ * When Sequence validation is disabled (default), Sequences are compressed as-is, -+ * so they must correct, otherwise it would result in a corruption error. - * -- * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for -+ * Sequence validation adds some protection, by ensuring that all values respect boundary conditions. -+ * If a Sequence is detected invalid (see doc/zstd_compression_format.md for - * specifics regarding offset/matchlength requirements) then the function will bail out and - * return an error. -- * - */ - #define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 - --/* ZSTD_c_useBlockSplitter -- * Controlled with ZSTD_paramSwitch_e enum. -+/* ZSTD_c_blockSplitterLevel -+ * note: this parameter only influences the first splitter stage, -+ * which is active before producing the sequences. -+ * ZSTD_c_splitAfterSequences controls the next splitter stage, -+ * which is active after sequence production. -+ * Note that both can be combined. -+ * Allowed values are between 0 and ZSTD_BLOCKSPLITTER_LEVEL_MAX included. -+ * 0 means "auto", which will select a value depending on current ZSTD_c_strategy. -+ * 1 means no splitting. -+ * Then, values from 2 to 6 are sorted in increasing cpu load order. -+ * -+ * Note that currently the first block is never split, -+ * to ensure expansion guarantees in presence of incompressible data. -+ */ -+#define ZSTD_BLOCKSPLITTER_LEVEL_MAX 6 -+#define ZSTD_c_blockSplitterLevel ZSTD_c_experimentalParam20 -+ -+/* ZSTD_c_splitAfterSequences -+ * This is a stronger splitter algorithm, -+ * based on actual sequences previously produced by the selected parser. -+ * It's also slower, and as a consequence, mostly used for high compression levels. -+ * While the post-splitter does overlap with the pre-splitter, -+ * both can nonetheless be combined, -+ * notably with ZSTD_c_blockSplitterLevel at ZSTD_BLOCKSPLITTER_LEVEL_MAX, -+ * resulting in higher compression ratio than just one of them. -+ * - * Default is ZSTD_ps_auto. - * Set to ZSTD_ps_disable to never use block splitter. - * Set to ZSTD_ps_enable to always use block splitter. -@@ -1894,10 +2213,10 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refP - * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use - * block splitting based on the compression parameters. - */ --#define ZSTD_c_useBlockSplitter ZSTD_c_experimentalParam13 -+#define ZSTD_c_splitAfterSequences ZSTD_c_experimentalParam13 - - /* ZSTD_c_useRowMatchFinder -- * Controlled with ZSTD_paramSwitch_e enum. -+ * Controlled with ZSTD_ParamSwitch_e enum. - * Default is ZSTD_ps_auto. - * Set to ZSTD_ps_disable to never use row-based matchfinder. - * Set to ZSTD_ps_enable to force usage of row-based matchfinder. -@@ -1928,6 +2247,80 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refP - */ - #define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15 - -+/* ZSTD_c_prefetchCDictTables -+ * Controlled with ZSTD_ParamSwitch_e enum. Default is ZSTD_ps_auto. -+ * -+ * In some situations, zstd uses CDict tables in-place rather than copying them -+ * into the working context. (See docs on ZSTD_dictAttachPref_e above for details). -+ * In such situations, compression speed is seriously impacted when CDict tables are -+ * "cold" (outside CPU cache). This parameter instructs zstd to prefetch CDict tables -+ * when they are used in-place. -+ * -+ * For sufficiently small inputs, the cost of the prefetch will outweigh the benefit. -+ * For sufficiently large inputs, zstd will by default memcpy() CDict tables -+ * into the working context, so there is no need to prefetch. This parameter is -+ * targeted at a middle range of input sizes, where a prefetch is cheap enough to be -+ * useful but memcpy() is too expensive. The exact range of input sizes where this -+ * makes sense is best determined by careful experimentation. -+ * -+ * Note: for this parameter, ZSTD_ps_auto is currently equivalent to ZSTD_ps_disable, -+ * but in the future zstd may conditionally enable this feature via an auto-detection -+ * heuristic for cold CDicts. -+ * Use ZSTD_ps_disable to opt out of prefetching under any circumstances. -+ */ -+#define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16 -+ -+/* ZSTD_c_enableSeqProducerFallback -+ * Allowed values are 0 (disable) and 1 (enable). The default setting is 0. -+ * -+ * Controls whether zstd will fall back to an internal sequence producer if an -+ * external sequence producer is registered and returns an error code. This fallback -+ * is block-by-block: the internal sequence producer will only be called for blocks -+ * where the external sequence producer returns an error code. Fallback parsing will -+ * follow any other cParam settings, such as compression level, the same as in a -+ * normal (fully-internal) compression operation. -+ * -+ * The user is strongly encouraged to read the full Block-Level Sequence Producer API -+ * documentation (below) before setting this parameter. */ -+#define ZSTD_c_enableSeqProducerFallback ZSTD_c_experimentalParam17 -+ -+/* ZSTD_c_maxBlockSize -+ * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB). -+ * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default. -+ * -+ * This parameter can be used to set an upper bound on the blocksize -+ * that overrides the default ZSTD_BLOCKSIZE_MAX. It cannot be used to set upper -+ * bounds greater than ZSTD_BLOCKSIZE_MAX or bounds lower than 1KB (will make -+ * compressBound() inaccurate). Only currently meant to be used for testing. -+ */ -+#define ZSTD_c_maxBlockSize ZSTD_c_experimentalParam18 -+ -+/* ZSTD_c_repcodeResolution -+ * This parameter only has an effect if ZSTD_c_blockDelimiters is -+ * set to ZSTD_sf_explicitBlockDelimiters (may change in the future). -+ * -+ * This parameter affects how zstd parses external sequences, -+ * provided via the ZSTD_compressSequences*() API -+ * or from an external block-level sequence producer. -+ * -+ * If set to ZSTD_ps_enable, the library will check for repeated offsets within -+ * external sequences, even if those repcodes are not explicitly indicated in -+ * the "rep" field. Note that this is the only way to exploit repcode matches -+ * while using compressSequences*() or an external sequence producer, since zstd -+ * currently ignores the "rep" field of external sequences. -+ * -+ * If set to ZSTD_ps_disable, the library will not exploit repeated offsets in -+ * external sequences, regardless of whether the "rep" field has been set. This -+ * reduces sequence compression overhead by about 25% while sacrificing some -+ * compression ratio. -+ * -+ * The default value is ZSTD_ps_auto, for which the library will enable/disable -+ * based on compression level (currently: level<10 disables, level>=10 enables). -+ */ -+#define ZSTD_c_repcodeResolution ZSTD_c_experimentalParam19 -+#define ZSTD_c_searchForExternalRepcodes ZSTD_c_experimentalParam19 /* older name */ -+ -+ - /*! ZSTD_CCtx_getParameter() : - * Get the requested compression parameter value, selected by enum ZSTD_cParameter, - * and store it into int* value. -@@ -2084,7 +2477,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getP - * in the range [dst, dst + pos) MUST not be modified during decompression - * or you will get data corruption. - * -- * When this flags is enabled zstd won't allocate an output buffer, because -+ * When this flag is enabled zstd won't allocate an output buffer, because - * it can write directly to the ZSTD_outBuffer, but it will still allocate - * an input buffer large enough to fit any compressed block. This will also - * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. -@@ -2137,6 +2530,33 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getP - */ - #define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4 - -+/* ZSTD_d_disableHuffmanAssembly -+ * Set to 1 to disable the Huffman assembly implementation. -+ * The default value is 0, which allows zstd to use the Huffman assembly -+ * implementation if available. -+ * -+ * This parameter can be used to disable Huffman assembly at runtime. -+ * If you want to disable it at compile time you can define the macro -+ * ZSTD_DISABLE_ASM. -+ */ -+#define ZSTD_d_disableHuffmanAssembly ZSTD_d_experimentalParam5 -+ -+/* ZSTD_d_maxBlockSize -+ * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB). -+ * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default. -+ * -+ * Forces the decompressor to reject blocks whose content size is -+ * larger than the configured maxBlockSize. When maxBlockSize is -+ * larger than the windowSize, the windowSize is used instead. -+ * This saves memory on the decoder when you know all blocks are small. -+ * -+ * This option is typically used in conjunction with ZSTD_c_maxBlockSize. -+ * -+ * WARNING: This causes the decoder to reject otherwise valid frames -+ * that have block sizes larger than the configured maxBlockSize. -+ */ -+#define ZSTD_d_maxBlockSize ZSTD_d_experimentalParam6 -+ - - /*! ZSTD_DCtx_setFormat() : - * This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter(). -@@ -2145,6 +2565,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getP - * such ZSTD_f_zstd1_magicless for example. - * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ - ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead") -+ZSTDLIB_STATIC_API - size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); - - /*! ZSTD_decompressStream_simpleArgs() : -@@ -2181,6 +2602,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompres - * This prototype will generate compilation warnings. - */ - ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") -+ZSTDLIB_STATIC_API - size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, - int compressionLevel, - unsigned long long pledgedSrcSize); -@@ -2198,17 +2620,15 @@ size_t ZSTD_initCStream_srcSize(ZSTD_CSt - * This prototype will generate compilation warnings. - */ - ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") -+ZSTDLIB_STATIC_API - size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - int compressionLevel); - - /*! ZSTD_initCStream_advanced() : -- * This function is DEPRECATED, and is approximately equivalent to: -+ * This function is DEPRECATED, and is equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); -- * // Pseudocode: Set each zstd parameter and leave the rest as-is. -- * for ((param, value) : params) { -- * ZSTD_CCtx_setParameter(zcs, param, value); -- * } -+ * ZSTD_CCtx_setParams(zcs, params); - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); - * -@@ -2218,6 +2638,7 @@ size_t ZSTD_initCStream_usingDict(ZSTD_C - * This prototype will generate compilation warnings. - */ - ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") -+ZSTDLIB_STATIC_API - size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - ZSTD_parameters params, -@@ -2232,15 +2653,13 @@ size_t ZSTD_initCStream_advanced(ZSTD_CS - * This prototype will generate compilation warnings. - */ - ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") -+ZSTDLIB_STATIC_API - size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); - - /*! ZSTD_initCStream_usingCDict_advanced() : -- * This function is DEPRECATED, and is approximately equivalent to: -+ * This function is DEPRECATED, and is equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); -- * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. -- * for ((fParam, value) : fParams) { -- * ZSTD_CCtx_setParameter(zcs, fParam, value); -- * } -+ * ZSTD_CCtx_setFParams(zcs, fParams); - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * ZSTD_CCtx_refCDict(zcs, cdict); - * -@@ -2250,6 +2669,7 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_ - * This prototype will generate compilation warnings. - */ - ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") -+ZSTDLIB_STATIC_API - size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, - const ZSTD_CDict* cdict, - ZSTD_frameParameters fParams, -@@ -2264,7 +2684,7 @@ size_t ZSTD_initCStream_usingCDict_advan - * explicitly specified. - * - * start a new frame, using same parameters from previous frame. -- * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. -+ * This is typically useful to skip dictionary loading stage, since it will reuse it in-place. - * Note that zcs must be init at least once before using ZSTD_resetCStream(). - * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. - * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. -@@ -2274,6 +2694,7 @@ size_t ZSTD_initCStream_usingCDict_advan - * This prototype will generate compilation warnings. - */ - ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") -+ZSTDLIB_STATIC_API - size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); - - -@@ -2319,8 +2740,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_toFlushNo - * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); - * - * note: no dictionary will be used if dict == NULL or dictSize < 8 -- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -+ZSTD_DEPRECATED("use ZSTD_DCtx_reset + ZSTD_DCtx_loadDictionary, see zstd.h for detailed instructions") - ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); - - /*! -@@ -2330,8 +2751,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_initDStre - * ZSTD_DCtx_refDDict(zds, ddict); - * - * note : ddict is referenced, it must outlive decompression session -- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -+ZSTD_DEPRECATED("use ZSTD_DCtx_reset + ZSTD_DCtx_refDDict, see zstd.h for detailed instructions") - ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); - - /*! -@@ -2339,18 +2760,202 @@ ZSTDLIB_STATIC_API size_t ZSTD_initDStre - * - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * -- * re-use decompression parameters from previous init; saves dictionary loading -- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x -+ * reuse decompression parameters from previous init; saves dictionary loading - */ -+ZSTD_DEPRECATED("use ZSTD_DCtx_reset, see zstd.h for detailed instructions") - ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); - - -+/* ********************* BLOCK-LEVEL SEQUENCE PRODUCER API ********************* -+ * -+ * *** OVERVIEW *** -+ * The Block-Level Sequence Producer API allows users to provide their own custom -+ * sequence producer which libzstd invokes to process each block. The produced list -+ * of sequences (literals and matches) is then post-processed by libzstd to produce -+ * valid compressed blocks. -+ * -+ * This block-level offload API is a more granular complement of the existing -+ * frame-level offload API compressSequences() (introduced in v1.5.1). It offers -+ * an easier migration story for applications already integrated with libzstd: the -+ * user application continues to invoke the same compression functions -+ * ZSTD_compress2() or ZSTD_compressStream2() as usual, and transparently benefits -+ * from the specific advantages of the external sequence producer. For example, -+ * the sequence producer could be tuned to take advantage of known characteristics -+ * of the input, to offer better speed / ratio, or could leverage hardware -+ * acceleration not available within libzstd itself. -+ * -+ * See contrib/externalSequenceProducer for an example program employing the -+ * Block-Level Sequence Producer API. -+ * -+ * *** USAGE *** -+ * The user is responsible for implementing a function of type -+ * ZSTD_sequenceProducer_F. For each block, zstd will pass the following -+ * arguments to the user-provided function: -+ * -+ * - sequenceProducerState: a pointer to a user-managed state for the sequence -+ * producer. -+ * -+ * - outSeqs, outSeqsCapacity: an output buffer for the sequence producer. -+ * outSeqsCapacity is guaranteed >= ZSTD_sequenceBound(srcSize). The memory -+ * backing outSeqs is managed by the CCtx. -+ * -+ * - src, srcSize: an input buffer for the sequence producer to parse. -+ * srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX. -+ * -+ * - dict, dictSize: a history buffer, which may be empty, which the sequence -+ * producer may reference as it parses the src buffer. Currently, zstd will -+ * always pass dictSize == 0 into external sequence producers, but this will -+ * change in the future. -+ * -+ * - compressionLevel: a signed integer representing the zstd compression level -+ * set by the user for the current operation. The sequence producer may choose -+ * to use this information to change its compression strategy and speed/ratio -+ * tradeoff. Note: the compression level does not reflect zstd parameters set -+ * through the advanced API. -+ * -+ * - windowSize: a size_t representing the maximum allowed offset for external -+ * sequences. Note that sequence offsets are sometimes allowed to exceed the -+ * windowSize if a dictionary is present, see doc/zstd_compression_format.md -+ * for details. -+ * -+ * The user-provided function shall return a size_t representing the number of -+ * sequences written to outSeqs. This return value will be treated as an error -+ * code if it is greater than outSeqsCapacity. The return value must be non-zero -+ * if srcSize is non-zero. The ZSTD_SEQUENCE_PRODUCER_ERROR macro is provided -+ * for convenience, but any value greater than outSeqsCapacity will be treated as -+ * an error code. -+ * -+ * If the user-provided function does not return an error code, the sequences -+ * written to outSeqs must be a valid parse of the src buffer. Data corruption may -+ * occur if the parse is not valid. A parse is defined to be valid if the -+ * following conditions hold: -+ * - The sum of matchLengths and literalLengths must equal srcSize. -+ * - All sequences in the parse, except for the final sequence, must have -+ * matchLength >= ZSTD_MINMATCH_MIN. The final sequence must have -+ * matchLength >= ZSTD_MINMATCH_MIN or matchLength == 0. -+ * - All offsets must respect the windowSize parameter as specified in -+ * doc/zstd_compression_format.md. -+ * - If the final sequence has matchLength == 0, it must also have offset == 0. -+ * -+ * zstd will only validate these conditions (and fail compression if they do not -+ * hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence -+ * validation has a performance cost. -+ * -+ * If the user-provided function returns an error, zstd will either fall back -+ * to an internal sequence producer or fail the compression operation. The user can -+ * choose between the two behaviors by setting the ZSTD_c_enableSeqProducerFallback -+ * cParam. Fallback compression will follow any other cParam settings, such as -+ * compression level, the same as in a normal compression operation. -+ * -+ * The user shall instruct zstd to use a particular ZSTD_sequenceProducer_F -+ * function by calling -+ * ZSTD_registerSequenceProducer(cctx, -+ * sequenceProducerState, -+ * sequenceProducer) -+ * This setting will persist until the next parameter reset of the CCtx. -+ * -+ * The sequenceProducerState must be initialized by the user before calling -+ * ZSTD_registerSequenceProducer(). The user is responsible for destroying the -+ * sequenceProducerState. -+ * -+ * *** LIMITATIONS *** -+ * This API is compatible with all zstd compression APIs which respect advanced parameters. -+ * However, there are three limitations: -+ * -+ * First, the ZSTD_c_enableLongDistanceMatching cParam is not currently supported. -+ * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with a block-level -+ * external sequence producer. -+ * - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in some -+ * cases (see its documentation for details). Users must explicitly set -+ * ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an external -+ * sequence producer is registered. -+ * - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default -+ * whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should -+ * check the docs on ZSTD_c_enableLongDistanceMatching whenever the Block-Level Sequence -+ * Producer API is used in conjunction with advanced settings (like ZSTD_c_windowLog). -+ * -+ * Second, history buffers are not currently supported. Concretely, zstd will always pass -+ * dictSize == 0 to the external sequence producer (for now). This has two implications: -+ * - Dictionaries are not currently supported. Compression will *not* fail if the user -+ * references a dictionary, but the dictionary won't have any effect. -+ * - Stream history is not currently supported. All advanced compression APIs, including -+ * streaming APIs, work with external sequence producers, but each block is treated as -+ * an independent chunk without history from previous blocks. -+ * -+ * Third, multi-threading within a single compression is not currently supported. In other words, -+ * COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external sequence producer is registered. -+ * Multi-threading across compressions is fine: simply create one CCtx per thread. -+ * -+ * Long-term, we plan to overcome all three limitations. There is no technical blocker to -+ * overcoming them. It is purely a question of engineering effort. -+ */ -+ -+#define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1)) -+ -+typedef size_t (*ZSTD_sequenceProducer_F) ( -+ void* sequenceProducerState, -+ ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, -+ const void* src, size_t srcSize, -+ const void* dict, size_t dictSize, -+ int compressionLevel, -+ size_t windowSize -+); -+ -+/*! ZSTD_registerSequenceProducer() : -+ * Instruct zstd to use a block-level external sequence producer function. -+ * -+ * The sequenceProducerState must be initialized by the caller, and the caller is -+ * responsible for managing its lifetime. This parameter is sticky across -+ * compressions. It will remain set until the user explicitly resets compression -+ * parameters. -+ * -+ * Sequence producer registration is considered to be an "advanced parameter", -+ * part of the "advanced API". This means it will only have an effect on compression -+ * APIs which respect advanced parameters, such as compress2() and compressStream2(). -+ * Older compression APIs such as compressCCtx(), which predate the introduction of -+ * "advanced parameters", will ignore any external sequence producer setting. -+ * -+ * The sequence producer can be "cleared" by registering a NULL function pointer. This -+ * removes all limitations described above in the "LIMITATIONS" section of the API docs. -+ * -+ * The user is strongly encouraged to read the full API documentation (above) before -+ * calling this function. */ -+ZSTDLIB_STATIC_API void -+ZSTD_registerSequenceProducer( -+ ZSTD_CCtx* cctx, -+ void* sequenceProducerState, -+ ZSTD_sequenceProducer_F sequenceProducer -+); -+ -+/*! ZSTD_CCtxParams_registerSequenceProducer() : -+ * Same as ZSTD_registerSequenceProducer(), but operates on ZSTD_CCtx_params. -+ * This is used for accurate size estimation with ZSTD_estimateCCtxSize_usingCCtxParams(), -+ * which is needed when creating a ZSTD_CCtx with ZSTD_initStaticCCtx(). -+ * -+ * If you are using the external sequence producer API in a scenario where ZSTD_initStaticCCtx() -+ * is required, then this function is for you. Otherwise, you probably don't need it. -+ * -+ * See tests/zstreamtest.c for example usage. */ -+ZSTDLIB_STATIC_API void -+ZSTD_CCtxParams_registerSequenceProducer( -+ ZSTD_CCtx_params* params, -+ void* sequenceProducerState, -+ ZSTD_sequenceProducer_F sequenceProducer -+); -+ -+ - /* ******************************************************************* --* Buffer-less and synchronous inner streaming functions -+* Buffer-less and synchronous inner streaming functions (DEPRECATED) -+* -+* This API is deprecated, and will be removed in a future version. -+* It allows streaming (de)compression with user allocated buffers. -+* However, it is hard to use, and not as well tested as the rest of -+* our API. - * --* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. --* But it's also a complex one, with several restrictions, documented below. --* Prefer normal streaming API for an easier experience. -+* Please use the normal streaming API instead: ZSTD_compressStream2, -+* and ZSTD_decompressStream. -+* If there is functionality that you need, but it doesn't provide, -+* please open an issue on our GitHub. - ********************************************************************* */ - - /* -@@ -2358,11 +2963,10 @@ ZSTDLIB_STATIC_API size_t ZSTD_resetDStr - - A ZSTD_CCtx object is required to track streaming operations. - Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. -- ZSTD_CCtx object can be re-used multiple times within successive compression operations. -+ ZSTD_CCtx object can be reused multiple times within successive compression operations. - - Start by initializing a context. - Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression. -- It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() - - Then, consume your input using ZSTD_compressContinue(). - There are some important considerations to keep in mind when using this advanced function : -@@ -2380,39 +2984,49 @@ ZSTDLIB_STATIC_API size_t ZSTD_resetDStr - It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. - Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. - -- `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. -+ `ZSTD_CCtx` object can be reused (ZSTD_compressBegin()) to compress again. - */ - - /*===== Buffer-less streaming compression functions =====*/ -+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") - ZSTDLIB_STATIC_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); -+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") - ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); -+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") - ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*< note: fails if cdict==NULL */ --ZSTDLIB_STATIC_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ - -+ZSTD_DEPRECATED("This function will likely be removed in a future release. It is misleading and has very limited utility.") -+ZSTDLIB_STATIC_API -+size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ -+ -+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") - ZSTDLIB_STATIC_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") - ZSTDLIB_STATIC_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); - - /* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */ - ZSTD_DEPRECATED("use advanced API to access custom parameters") -+ZSTDLIB_STATIC_API - size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ - ZSTD_DEPRECATED("use advanced API to access custom parameters") -+ZSTDLIB_STATIC_API - size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ - /* - Buffer-less streaming decompression (synchronous mode) - - A ZSTD_DCtx object is required to track streaming operations. - Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. -- A ZSTD_DCtx object can be re-used multiple times. -+ A ZSTD_DCtx object can be reused multiple times. - - First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). - Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. - Data fragment must be large enough to ensure successful decoding. - `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. -- @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. -- >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. -+ result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. -+ >0 : `srcSize` is too small, please provide at least result bytes on next attempt. - errorCode, which can be tested using ZSTD_isError(). - -- It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, -+ It fills a ZSTD_FrameHeader structure with important information to correctly decode the frame, - such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). - Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. - As a consequence, check that values remain within valid application range. -@@ -2428,7 +3042,7 @@ size_t ZSTD_compressBegin_usingCDict_adv - - The most memory efficient way is to use a round buffer of sufficient size. - Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), -- which can @return an error code if required value is too large for current system (in 32-bits mode). -+ which can return an error code if required value is too large for current system (in 32-bits mode). - In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, - up to the moment there is not enough room left in the buffer to guarantee decoding another full block, - which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. -@@ -2448,7 +3062,7 @@ size_t ZSTD_compressBegin_usingCDict_adv - ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). - ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. - -- @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). -+ result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). - It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. - It can also be an error code, which can be tested with ZSTD_isError(). - -@@ -2471,27 +3085,7 @@ size_t ZSTD_compressBegin_usingCDict_adv - */ - - /*===== Buffer-less streaming decompression functions =====*/ --typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; --typedef struct { -- unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ -- unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ -- unsigned blockSizeMax; -- ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ -- unsigned headerSize; -- unsigned dictID; -- unsigned checksumFlag; --} ZSTD_frameHeader; - --/*! ZSTD_getFrameHeader() : -- * decode Frame Header, or requires larger `srcSize`. -- * @return : 0, `zfhPtr` is correctly filled, -- * >0, `srcSize` is too small, value is wanted `srcSize` amount, -- * or an error code, which can be tested using ZSTD_isError() */ --ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /*< doesn't consume input */ --/*! ZSTD_getFrameHeader_advanced() : -- * same as ZSTD_getFrameHeader(), -- * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ --ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); - ZSTDLIB_STATIC_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /*< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ - - ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); -@@ -2502,6 +3096,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_nextSrcSi - ZSTDLIB_STATIC_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); - - /* misc */ -+ZSTD_DEPRECATED("This function will likely be removed in the next minor release. It is misleading and has very limited utility.") - ZSTDLIB_STATIC_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); - typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; - ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); -@@ -2509,11 +3104,23 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e - - - --/* ============================ */ --/* Block level API */ --/* ============================ */ -+/* ========================================= */ -+/* Block level API (DEPRECATED) */ -+/* ========================================= */ - - /*! -+ -+ This API is deprecated in favor of the regular compression API. -+ You can get the frame header down to 2 bytes by setting: -+ - ZSTD_c_format = ZSTD_f_zstd1_magicless -+ - ZSTD_c_contentSizeFlag = 0 -+ - ZSTD_c_checksumFlag = 0 -+ - ZSTD_c_dictIDFlag = 0 -+ -+ This API is not as well tested as our normal API, so we recommend not using it. -+ We will be removing it in a future version. If the normal API doesn't provide -+ the functionality you need, please open a GitHub issue. -+ - Block functions produce and decode raw zstd blocks, without frame metadata. - Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). - But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. -@@ -2524,7 +3131,6 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e - - It is necessary to init context before starting - + compression : any ZSTD_compressBegin*() variant, including with dictionary - + decompression : any ZSTD_decompressBegin*() variant, including with dictionary -- + copyCCtx() and copyDCtx() can be used too - - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB - + If input is larger than a block size, it's necessary to split input data into multiple blocks - + For inputs larger than a single block, consider using regular ZSTD_compress() instead. -@@ -2541,11 +3147,14 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e - */ - - /*===== Raw zstd block functions =====*/ -+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") - ZSTDLIB_STATIC_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); -+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") - ZSTDLIB_STATIC_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") - ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") - ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /*< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ - - - #endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ -- ---- a/lib/zstd/Makefile -+++ b/lib/zstd/Makefile -@@ -1,6 +1,6 @@ - # SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - # ################################################################ --# Copyright (c) Facebook, Inc. -+# Copyright (c) Meta Platforms, Inc. and affiliates. - # All rights reserved. - # - # This source code is licensed under both the BSD-style license (found in the -@@ -26,6 +26,7 @@ zstd_compress-y := \ - compress/zstd_lazy.o \ - compress/zstd_ldm.o \ - compress/zstd_opt.o \ -+ compress/zstd_preSplit.o \ - - zstd_decompress-y := \ - zstd_decompress_module.o \ ---- /dev/null -+++ b/lib/zstd/common/allocations.h -@@ -0,0 +1,56 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ -+/* -+ * Copyright (c) Meta Platforms, Inc. and affiliates. -+ * All rights reserved. -+ * -+ * This source code is licensed under both the BSD-style license (found in the -+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found -+ * in the COPYING file in the root directory of this source tree). -+ * You may select, at your option, one of the above-listed licenses. -+ */ -+ -+/* This file provides custom allocation primitives -+ */ -+ -+#define ZSTD_DEPS_NEED_MALLOC -+#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */ -+ -+#include "compiler.h" /* MEM_STATIC */ -+#define ZSTD_STATIC_LINKING_ONLY -+#include /* ZSTD_customMem */ -+ -+#ifndef ZSTD_ALLOCATIONS_H -+#define ZSTD_ALLOCATIONS_H -+ -+/* custom memory allocation functions */ -+ -+MEM_STATIC void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) -+{ -+ if (customMem.customAlloc) -+ return customMem.customAlloc(customMem.opaque, size); -+ return ZSTD_malloc(size); -+} -+ -+MEM_STATIC void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) -+{ -+ if (customMem.customAlloc) { -+ /* calloc implemented as malloc+memset; -+ * not as efficient as calloc, but next best guess for custom malloc */ -+ void* const ptr = customMem.customAlloc(customMem.opaque, size); -+ ZSTD_memset(ptr, 0, size); -+ return ptr; -+ } -+ return ZSTD_calloc(1, size); -+} -+ -+MEM_STATIC void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) -+{ -+ if (ptr!=NULL) { -+ if (customMem.customFree) -+ customMem.customFree(customMem.opaque, ptr); -+ else -+ ZSTD_free(ptr); -+ } -+} -+ -+#endif /* ZSTD_ALLOCATIONS_H */ ---- /dev/null -+++ b/lib/zstd/common/bits.h -@@ -0,0 +1,150 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ -+/* -+ * Copyright (c) Meta Platforms, Inc. and affiliates. -+ * All rights reserved. -+ * -+ * This source code is licensed under both the BSD-style license (found in the -+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found -+ * in the COPYING file in the root directory of this source tree). -+ * You may select, at your option, one of the above-listed licenses. -+ */ -+ -+#ifndef ZSTD_BITS_H -+#define ZSTD_BITS_H -+ -+#include "mem.h" -+ -+MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val) -+{ -+ assert(val != 0); -+ { -+ static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3, -+ 30, 22, 20, 15, 25, 17, 4, 8, -+ 31, 27, 13, 23, 21, 19, 16, 7, -+ 26, 12, 18, 6, 11, 5, 10, 9}; -+ return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27]; -+ } -+} -+ -+MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val) -+{ -+ assert(val != 0); -+#if (__GNUC__ >= 4) -+ return (unsigned)__builtin_ctz(val); -+#else -+ return ZSTD_countTrailingZeros32_fallback(val); -+#endif -+} -+ -+MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) -+{ -+ assert(val != 0); -+ { -+ static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29, -+ 11, 14, 16, 18, 22, 25, 3, 30, -+ 8, 12, 20, 28, 15, 17, 24, 7, -+ 19, 27, 23, 6, 26, 5, 4, 31}; -+ val |= val >> 1; -+ val |= val >> 2; -+ val |= val >> 4; -+ val |= val >> 8; -+ val |= val >> 16; -+ return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27]; -+ } -+} -+ -+MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val) -+{ -+ assert(val != 0); -+#if (__GNUC__ >= 4) -+ return (unsigned)__builtin_clz(val); -+#else -+ return ZSTD_countLeadingZeros32_fallback(val); -+#endif -+} -+ -+MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val) -+{ -+ assert(val != 0); -+#if (__GNUC__ >= 4) && defined(__LP64__) -+ return (unsigned)__builtin_ctzll(val); -+#else -+ { -+ U32 mostSignificantWord = (U32)(val >> 32); -+ U32 leastSignificantWord = (U32)val; -+ if (leastSignificantWord == 0) { -+ return 32 + ZSTD_countTrailingZeros32(mostSignificantWord); -+ } else { -+ return ZSTD_countTrailingZeros32(leastSignificantWord); -+ } -+ } -+#endif -+} -+ -+MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val) -+{ -+ assert(val != 0); -+#if (__GNUC__ >= 4) -+ return (unsigned)(__builtin_clzll(val)); -+#else -+ { -+ U32 mostSignificantWord = (U32)(val >> 32); -+ U32 leastSignificantWord = (U32)val; -+ if (mostSignificantWord == 0) { -+ return 32 + ZSTD_countLeadingZeros32(leastSignificantWord); -+ } else { -+ return ZSTD_countLeadingZeros32(mostSignificantWord); -+ } -+ } -+#endif -+} -+ -+MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val) -+{ -+ if (MEM_isLittleEndian()) { -+ if (MEM_64bits()) { -+ return ZSTD_countTrailingZeros64((U64)val) >> 3; -+ } else { -+ return ZSTD_countTrailingZeros32((U32)val) >> 3; -+ } -+ } else { /* Big Endian CPU */ -+ if (MEM_64bits()) { -+ return ZSTD_countLeadingZeros64((U64)val) >> 3; -+ } else { -+ return ZSTD_countLeadingZeros32((U32)val) >> 3; -+ } -+ } -+} -+ -+MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ -+{ -+ assert(val != 0); -+ return 31 - ZSTD_countLeadingZeros32(val); -+} -+ -+/* ZSTD_rotateRight_*(): -+ * Rotates a bitfield to the right by "count" bits. -+ * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts -+ */ -+MEM_STATIC -+U64 ZSTD_rotateRight_U64(U64 const value, U32 count) { -+ assert(count < 64); -+ count &= 0x3F; /* for fickle pattern recognition */ -+ return (value >> count) | (U64)(value << ((0U - count) & 0x3F)); -+} -+ -+MEM_STATIC -+U32 ZSTD_rotateRight_U32(U32 const value, U32 count) { -+ assert(count < 32); -+ count &= 0x1F; /* for fickle pattern recognition */ -+ return (value >> count) | (U32)(value << ((0U - count) & 0x1F)); -+} -+ -+MEM_STATIC -+U16 ZSTD_rotateRight_U16(U16 const value, U32 count) { -+ assert(count < 16); -+ count &= 0x0F; /* for fickle pattern recognition */ -+ return (value >> count) | (U16)(value << ((0U - count) & 0x0F)); -+} -+ -+#endif /* ZSTD_BITS_H */ ---- a/lib/zstd/common/bitstream.h -+++ b/lib/zstd/common/bitstream.h -@@ -1,7 +1,8 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* ****************************************************************** - * bitstream - * Part of FSE library -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy -@@ -27,7 +28,7 @@ - #include "compiler.h" /* UNLIKELY() */ - #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ - #include "error_private.h" /* error codes and messages */ -- -+#include "bits.h" /* ZSTD_highbit32 */ - - /*========================================= - * Target specific -@@ -41,12 +42,13 @@ - /*-****************************************** - * bitStream encoding API (write forward) - ********************************************/ -+typedef size_t BitContainerType; - /* bitStream can mix input from multiple sources. - * A critical property of these streams is that they encode and decode in **reverse** direction. - * So the first bit sequence you add will be the last to be read, like a LIFO stack. - */ - typedef struct { -- size_t bitContainer; -+ BitContainerType bitContainer; - unsigned bitPos; - char* startPtr; - char* ptr; -@@ -54,7 +56,7 @@ typedef struct { - } BIT_CStream_t; - - MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); --MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); -+MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits); - MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); - MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); - -@@ -63,7 +65,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_C - * `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. - * - * bits are first added to a local register. --* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. -+* Local register is BitContainerType, 64-bits on 64-bits systems, or 32-bits on 32-bits systems. - * Writing data into memory is an explicit operation, performed by the flushBits function. - * Hence keep track how many bits are potentially stored into local register to avoid register overflow. - * After a flushBits, a maximum of 7 bits might still be stored into local register. -@@ -80,28 +82,28 @@ MEM_STATIC size_t BIT_closeCStream(BIT_C - * bitStream decoding API (read backward) - **********************************************/ - typedef struct { -- size_t bitContainer; -+ BitContainerType bitContainer; - unsigned bitsConsumed; - const char* ptr; - const char* start; - const char* limitPtr; - } BIT_DStream_t; - --typedef enum { BIT_DStream_unfinished = 0, -- BIT_DStream_endOfBuffer = 1, -- BIT_DStream_completed = 2, -- BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ -- /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ -+typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */ -+ BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */ -+ BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */ -+ BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */ -+ } BIT_DStream_status; /* result of BIT_reloadDStream() */ - - MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); --MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); -+MEM_STATIC BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); - MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); - MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); - - - /* Start by invoking BIT_initDStream(). - * A chunk of the bitStream is then stored into a local register. --* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). -+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType). - * You can then retrieve bitFields stored into the local register, **in reverse order**. - * Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. - * A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. -@@ -113,7 +115,7 @@ MEM_STATIC unsigned BIT_endOfDStream(con - /*-**************************************** - * unsafe API - ******************************************/ --MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); -+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits); - /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ - - MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); -@@ -122,33 +124,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CS - MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); - /* faster, but works only if nbBits >= 1 */ - -- -- --/*-************************************************************** --* Internal functions --****************************************************************/ --MEM_STATIC unsigned BIT_highbit32 (U32 val) --{ -- assert(val != 0); -- { --# if (__GNUC__ >= 3) /* Use GCC Intrinsic */ -- return __builtin_clz (val) ^ 31; --# else /* Software version */ -- static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, -- 11, 14, 16, 18, 22, 25, 3, 30, -- 8, 12, 20, 28, 15, 17, 24, 7, -- 19, 27, 23, 6, 26, 5, 4, 31 }; -- U32 v = val; -- v |= v >> 1; -- v |= v >> 2; -- v |= v >> 4; -- v |= v >> 8; -- v |= v >> 16; -- return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; --# endif -- } --} -- - /*===== Local Constants =====*/ - static const unsigned BIT_mask[] = { - 0, 1, 3, 7, 0xF, 0x1F, -@@ -178,16 +153,22 @@ MEM_STATIC size_t BIT_initCStream(BIT_CS - return 0; - } - -+FORCE_INLINE_TEMPLATE BitContainerType BIT_getLowerBits(BitContainerType bitContainer, U32 const nbBits) -+{ -+ assert(nbBits < BIT_MASK_SIZE); -+ return bitContainer & BIT_mask[nbBits]; -+} -+ - /*! BIT_addBits() : - * can add up to 31 bits into `bitC`. - * Note : does not check for register overflow ! */ - MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, -- size_t value, unsigned nbBits) -+ BitContainerType value, unsigned nbBits) - { - DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32); - assert(nbBits < BIT_MASK_SIZE); - assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); -- bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; -+ bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos; - bitC->bitPos += nbBits; - } - -@@ -195,7 +176,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_ - * works only if `value` is _clean_, - * meaning all high bits above nbBits are 0 */ - MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, -- size_t value, unsigned nbBits) -+ BitContainerType value, unsigned nbBits) - { - assert((value>>nbBits) == 0); - assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); -@@ -242,7 +223,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_C - BIT_addBitsFast(bitC, 1, 1); /* endMark */ - BIT_flushBits(bitC); - if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ -- return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); -+ return (size_t)(bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); - } - - -@@ -266,35 +247,35 @@ MEM_STATIC size_t BIT_initDStream(BIT_DS - bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); - bitD->bitContainer = MEM_readLEST(bitD->ptr); - { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; -- bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ -+ bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ - if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } - } else { - bitD->ptr = bitD->start; - bitD->bitContainer = *(const BYTE*)(bitD->start); - switch(srcSize) - { -- case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); -+ case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); - ZSTD_FALLTHROUGH; - -- case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); -+ case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); - ZSTD_FALLTHROUGH; - -- case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); -+ case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); - ZSTD_FALLTHROUGH; - -- case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; -+ case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24; - ZSTD_FALLTHROUGH; - -- case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; -+ case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16; - ZSTD_FALLTHROUGH; - -- case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; -+ case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8; - ZSTD_FALLTHROUGH; - - default: break; - } - { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; -- bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; -+ bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; - if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ - } - bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; -@@ -303,12 +284,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DS - return srcSize; - } - --MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start) -+FORCE_INLINE_TEMPLATE BitContainerType BIT_getUpperBits(BitContainerType bitContainer, U32 const start) - { - return bitContainer >> start; - } - --MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) -+FORCE_INLINE_TEMPLATE BitContainerType BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits) - { - U32 const regMask = sizeof(bitContainer)*8 - 1; - /* if start > regMask, bitstream is corrupted, and result is undefined */ -@@ -318,26 +299,20 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_ - * such cpus old (pre-Haswell, 2013) and their performance is not of that - * importance. - */ --#if defined(__x86_64__) || defined(_M_X86) -+#if defined(__x86_64__) || defined(_M_X64) - return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1); - #else - return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; - #endif - } - --MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) --{ -- assert(nbBits < BIT_MASK_SIZE); -- return bitContainer & BIT_mask[nbBits]; --} -- - /*! BIT_lookBits() : - * Provides next n bits from local register. - * local register is not modified. - * On 32-bits, maxNbBits==24. - * On 64-bits, maxNbBits==56. - * @return : value extracted */ --MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) -+FORCE_INLINE_TEMPLATE BitContainerType BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) - { - /* arbitrate between double-shift and shift+mask */ - #if 1 -@@ -353,14 +328,14 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT - - /*! BIT_lookBitsFast() : - * unsafe version; only works if nbBits >= 1 */ --MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) -+MEM_STATIC BitContainerType BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) - { - U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; - assert(nbBits >= 1); - return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); - } - --MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) -+FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) - { - bitD->bitsConsumed += nbBits; - } -@@ -369,23 +344,38 @@ MEM_STATIC FORCE_INLINE_ATTR void BIT_sk - * Read (consume) next n bits from local register and update. - * Pay attention to not read more than nbBits contained into local register. - * @return : extracted value. */ --MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) -+FORCE_INLINE_TEMPLATE BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) - { -- size_t const value = BIT_lookBits(bitD, nbBits); -+ BitContainerType const value = BIT_lookBits(bitD, nbBits); - BIT_skipBits(bitD, nbBits); - return value; - } - - /*! BIT_readBitsFast() : -- * unsafe version; only works only if nbBits >= 1 */ --MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) -+ * unsafe version; only works if nbBits >= 1 */ -+MEM_STATIC BitContainerType BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) - { -- size_t const value = BIT_lookBitsFast(bitD, nbBits); -+ BitContainerType const value = BIT_lookBitsFast(bitD, nbBits); - assert(nbBits >= 1); - BIT_skipBits(bitD, nbBits); - return value; - } - -+/*! BIT_reloadDStream_internal() : -+ * Simple variant of BIT_reloadDStream(), with two conditions: -+ * 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8 -+ * 2. look window is valid after shifted down : bitD->ptr >= bitD->start -+ */ -+MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD) -+{ -+ assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); -+ bitD->ptr -= bitD->bitsConsumed >> 3; -+ assert(bitD->ptr >= bitD->start); -+ bitD->bitsConsumed &= 7; -+ bitD->bitContainer = MEM_readLEST(bitD->ptr); -+ return BIT_DStream_unfinished; -+} -+ - /*! BIT_reloadDStreamFast() : - * Similar to BIT_reloadDStream(), but with two differences: - * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! -@@ -396,31 +386,35 @@ MEM_STATIC BIT_DStream_status BIT_reload - { - if (UNLIKELY(bitD->ptr < bitD->limitPtr)) - return BIT_DStream_overflow; -- assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); -- bitD->ptr -= bitD->bitsConsumed >> 3; -- bitD->bitsConsumed &= 7; -- bitD->bitContainer = MEM_readLEST(bitD->ptr); -- return BIT_DStream_unfinished; -+ return BIT_reloadDStream_internal(bitD); - } - - /*! BIT_reloadDStream() : - * Refill `bitD` from buffer previously set in BIT_initDStream() . -- * This function is safe, it guarantees it will not read beyond src buffer. -+ * This function is safe, it guarantees it will not never beyond src buffer. - * @return : status of `BIT_DStream_t` internal register. - * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ --MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) -+FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) - { -- if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ -+ /* note : once in overflow mode, a bitstream remains in this mode until it's reset */ -+ if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) { -+ static const BitContainerType zeroFilled = 0; -+ bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */ -+ /* overflow detected, erroneous scenario or end of stream: no update */ - return BIT_DStream_overflow; -+ } -+ -+ assert(bitD->ptr >= bitD->start); - - if (bitD->ptr >= bitD->limitPtr) { -- return BIT_reloadDStreamFast(bitD); -+ return BIT_reloadDStream_internal(bitD); - } - if (bitD->ptr == bitD->start) { -+ /* reached end of bitStream => no update */ - if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; - return BIT_DStream_completed; - } -- /* start < ptr < limitPtr */ -+ /* start < ptr < limitPtr => cautious update */ - { U32 nbBytes = bitD->bitsConsumed >> 3; - BIT_DStream_status result = BIT_DStream_unfinished; - if (bitD->ptr - nbBytes < bitD->start) { -@@ -442,5 +436,4 @@ MEM_STATIC unsigned BIT_endOfDStream(con - return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); - } - -- - #endif /* BITSTREAM_H_MODULE */ ---- a/lib/zstd/common/compiler.h -+++ b/lib/zstd/common/compiler.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -11,6 +12,8 @@ - #ifndef ZSTD_COMPILER_H - #define ZSTD_COMPILER_H - -+#include -+ - #include "portability_macros.h" - - /*-******************************************************* -@@ -41,12 +44,15 @@ - */ - #define WIN_CDECL - -+/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ -+#define UNUSED_ATTR __attribute__((unused)) -+ - /* - * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant - * parameters. They must be inlined for the compiler to eliminate the constant - * branches. - */ --#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR -+#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR UNUSED_ATTR - /* - * HINT_INLINE is used to help the compiler generate better code. It is *not* - * used for "templates", so it can be tweaked based on the compilers -@@ -61,11 +67,21 @@ - #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 - # define HINT_INLINE static INLINE_KEYWORD - #else --# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR -+# define HINT_INLINE FORCE_INLINE_TEMPLATE - #endif - --/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ --#define UNUSED_ATTR __attribute__((unused)) -+/* "soft" inline : -+ * The compiler is free to select if it's a good idea to inline or not. -+ * The main objective is to silence compiler warnings -+ * when a defined function in included but not used. -+ * -+ * Note : this macro is prefixed `MEM_` because it used to be provided by `mem.h` unit. -+ * Updating the prefix is probably preferable, but requires a fairly large codemod, -+ * since this name is used everywhere. -+ */ -+#ifndef MEM_STATIC /* already defined in Linux Kernel mem.h */ -+#define MEM_STATIC static __inline UNUSED_ATTR -+#endif - - /* force no inlining */ - #define FORCE_NOINLINE static __attribute__((__noinline__)) -@@ -86,23 +102,24 @@ - # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) - # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) - #elif defined(__aarch64__) --# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) --# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) -+# define PREFETCH_L1(ptr) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0) -+# define PREFETCH_L2(ptr) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0) - #else --# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ --# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ -+# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */ -+# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */ - #endif /* NO_PREFETCH */ - - #define CACHELINE_SIZE 64 - --#define PREFETCH_AREA(p, s) { \ -- const char* const _ptr = (const char*)(p); \ -- size_t const _size = (size_t)(s); \ -- size_t _pos; \ -- for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ -- PREFETCH_L2(_ptr + _pos); \ -- } \ --} -+#define PREFETCH_AREA(p, s) \ -+ do { \ -+ const char* const _ptr = (const char*)(p); \ -+ size_t const _size = (size_t)(s); \ -+ size_t _pos; \ -+ for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ -+ PREFETCH_L2(_ptr + _pos); \ -+ } \ -+ } while (0) - - /* vectorization - * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax, -@@ -126,16 +143,13 @@ - #define UNLIKELY(x) (__builtin_expect((x), 0)) - - #if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))) --# define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); } -+# define ZSTD_UNREACHABLE do { assert(0), __builtin_unreachable(); } while (0) - #else --# define ZSTD_UNREACHABLE { assert(0); } -+# define ZSTD_UNREACHABLE do { assert(0); } while (0) - #endif - - /* disable warnings */ - --/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/ -- -- - /* compile time determination of SIMD support */ - - /* C-language Attributes are added in C23. */ -@@ -158,9 +172,15 @@ - #define ZSTD_FALLTHROUGH fallthrough - - /*-************************************************************** --* Alignment check -+* Alignment - *****************************************************************/ - -+/* @return 1 if @u is a 2^n value, 0 otherwise -+ * useful to check a value is valid for alignment restrictions */ -+MEM_STATIC int ZSTD_isPower2(size_t u) { -+ return (u & (u-1)) == 0; -+} -+ - /* this test was initially positioned in mem.h, - * but this file is removed (or replaced) for linux kernel - * so it's now hosted in compiler.h, -@@ -175,10 +195,95 @@ - - #endif /* ZSTD_ALIGNOF */ - -+#ifndef ZSTD_ALIGNED -+/* C90-compatible alignment macro (GCC/Clang). Adjust for other compilers if needed. */ -+#define ZSTD_ALIGNED(a) __attribute__((aligned(a))) -+#endif /* ZSTD_ALIGNED */ -+ -+ - /*-************************************************************** - * Sanitizer - *****************************************************************/ - -+/* -+ * Zstd relies on pointer overflow in its decompressor. -+ * We add this attribute to functions that rely on pointer overflow. -+ */ -+#ifndef ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+# if __has_attribute(no_sanitize) -+# if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8 -+ /* gcc < 8 only has signed-integer-overlow which triggers on pointer overflow */ -+# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("signed-integer-overflow"))) -+# else -+ /* older versions of clang [3.7, 5.0) will warn that pointer-overflow is ignored. */ -+# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("pointer-overflow"))) -+# endif -+# else -+# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+# endif -+#endif -+ -+/* -+ * Helper function to perform a wrapped pointer difference without triggering -+ * UBSAN. -+ * -+ * @returns lhs - rhs with wrapping -+ */ -+MEM_STATIC -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+ptrdiff_t ZSTD_wrappedPtrDiff(unsigned char const* lhs, unsigned char const* rhs) -+{ -+ return lhs - rhs; -+} -+ -+/* -+ * Helper function to perform a wrapped pointer add without triggering UBSAN. -+ * -+ * @return ptr + add with wrapping -+ */ -+MEM_STATIC -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+unsigned char const* ZSTD_wrappedPtrAdd(unsigned char const* ptr, ptrdiff_t add) -+{ -+ return ptr + add; -+} -+ -+/* -+ * Helper function to perform a wrapped pointer subtraction without triggering -+ * UBSAN. -+ * -+ * @return ptr - sub with wrapping -+ */ -+MEM_STATIC -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+unsigned char const* ZSTD_wrappedPtrSub(unsigned char const* ptr, ptrdiff_t sub) -+{ -+ return ptr - sub; -+} -+ -+/* -+ * Helper function to add to a pointer that works around C's undefined behavior -+ * of adding 0 to NULL. -+ * -+ * @returns `ptr + add` except it defines `NULL + 0 == NULL`. -+ */ -+MEM_STATIC -+unsigned char* ZSTD_maybeNullPtrAdd(unsigned char* ptr, ptrdiff_t add) -+{ -+ return add > 0 ? ptr + add : ptr; -+} -+ -+/* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an -+ * abundance of caution, disable our custom poisoning on mingw. */ -+#ifdef __MINGW32__ -+#ifndef ZSTD_ASAN_DONT_POISON_WORKSPACE -+#define ZSTD_ASAN_DONT_POISON_WORKSPACE 1 -+#endif -+#ifndef ZSTD_MSAN_DONT_POISON_WORKSPACE -+#define ZSTD_MSAN_DONT_POISON_WORKSPACE 1 -+#endif -+#endif -+ - - - #endif /* ZSTD_COMPILER_H */ ---- a/lib/zstd/common/cpu.h -+++ b/lib/zstd/common/cpu.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the ---- a/lib/zstd/common/debug.c -+++ b/lib/zstd/common/debug.c -@@ -1,7 +1,8 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* ****************************************************************** - * debug - * Part of FSE library -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy -@@ -21,4 +22,10 @@ - - #include "debug.h" - -+#if (DEBUGLEVEL>=2) -+/* We only use this when DEBUGLEVEL>=2, but we get -Werror=pedantic errors if a -+ * translation unit is empty. So remove this from Linux kernel builds, but -+ * otherwise just leave it in. -+ */ - int g_debuglevel = DEBUGLEVEL; -+#endif ---- a/lib/zstd/common/debug.h -+++ b/lib/zstd/common/debug.h -@@ -1,7 +1,8 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* ****************************************************************** - * debug - * Part of FSE library -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy -@@ -33,7 +34,6 @@ - #define DEBUG_H_12987983217 - - -- - /* static assert is triggered at compile time, leaving no runtime artefact. - * static assert only works with compile-time constants. - * Also, this variant can only be used inside a function. */ -@@ -82,20 +82,27 @@ extern int g_debuglevel; /* the variable - It's useful when enabling very verbose levels - on selective conditions (such as position in src) */ - --# define RAWLOG(l, ...) { \ -- if (l<=g_debuglevel) { \ -- ZSTD_DEBUG_PRINT(__VA_ARGS__); \ -- } } --# define DEBUGLOG(l, ...) { \ -- if (l<=g_debuglevel) { \ -- ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \ -- ZSTD_DEBUG_PRINT(" \n"); \ -- } } -+# define RAWLOG(l, ...) \ -+ do { \ -+ if (l<=g_debuglevel) { \ -+ ZSTD_DEBUG_PRINT(__VA_ARGS__); \ -+ } \ -+ } while (0) -+ -+#define STRINGIFY(x) #x -+#define TOSTRING(x) STRINGIFY(x) -+#define LINE_AS_STRING TOSTRING(__LINE__) -+ -+# define DEBUGLOG(l, ...) \ -+ do { \ -+ if (l<=g_debuglevel) { \ -+ ZSTD_DEBUG_PRINT(__FILE__ ":" LINE_AS_STRING ": " __VA_ARGS__); \ -+ ZSTD_DEBUG_PRINT(" \n"); \ -+ } \ -+ } while (0) - #else --# define RAWLOG(l, ...) {} /* disabled */ --# define DEBUGLOG(l, ...) {} /* disabled */ -+# define RAWLOG(l, ...) do { } while (0) /* disabled */ -+# define DEBUGLOG(l, ...) do { } while (0) /* disabled */ - #endif - -- -- - #endif /* DEBUG_H_12987983217 */ ---- a/lib/zstd/common/entropy_common.c -+++ b/lib/zstd/common/entropy_common.c -@@ -1,6 +1,7 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* ****************************************************************** - * Common functions of New Generation Entropy library -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * You can contact the author at : - * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy -@@ -19,8 +20,8 @@ - #include "error_private.h" /* ERR_*, ERROR */ - #define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ - #include "fse.h" --#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ - #include "huf.h" -+#include "bits.h" /* ZSDT_highbit32, ZSTD_countTrailingZeros32 */ - - - /*=== Version ===*/ -@@ -38,23 +39,6 @@ const char* HUF_getErrorName(size_t code - /*-************************************************************** - * FSE NCount encoding-decoding - ****************************************************************/ --static U32 FSE_ctz(U32 val) --{ -- assert(val != 0); -- { --# if (__GNUC__ >= 3) /* GCC Intrinsic */ -- return __builtin_ctz(val); --# else /* Software version */ -- U32 count = 0; -- while ((val & 1) == 0) { -- val >>= 1; -- ++count; -- } -- return count; --# endif -- } --} -- - FORCE_INLINE_TEMPLATE - size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, - const void* headerBuffer, size_t hbSize) -@@ -102,7 +86,7 @@ size_t FSE_readNCount_body(short* normal - * repeat. - * Avoid UB by setting the high bit to 1. - */ -- int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; -+ int repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1; - while (repeats >= 12) { - charnum += 3 * 12; - if (LIKELY(ip <= iend-7)) { -@@ -113,7 +97,7 @@ size_t FSE_readNCount_body(short* normal - ip = iend - 4; - } - bitStream = MEM_readLE32(ip) >> bitCount; -- repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; -+ repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1; - } - charnum += 3 * repeats; - bitStream >>= 2 * repeats; -@@ -178,7 +162,7 @@ size_t FSE_readNCount_body(short* normal - * know that threshold > 1. - */ - if (remaining <= 1) break; -- nbBits = BIT_highbit32(remaining) + 1; -+ nbBits = ZSTD_highbit32(remaining) + 1; - threshold = 1 << (nbBits - 1); - } - if (charnum >= maxSV1) break; -@@ -253,7 +237,7 @@ size_t HUF_readStats(BYTE* huffWeight, s - const void* src, size_t srcSize) - { - U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; -- return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); -+ return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* flags */ 0); - } - - FORCE_INLINE_TEMPLATE size_t -@@ -301,14 +285,14 @@ HUF_readStats_body(BYTE* huffWeight, siz - if (weightTotal == 0) return ERROR(corruption_detected); - - /* get last non-null symbol weight (implied, total must be 2^n) */ -- { U32 const tableLog = BIT_highbit32(weightTotal) + 1; -+ { U32 const tableLog = ZSTD_highbit32(weightTotal) + 1; - if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); - *tableLogPtr = tableLog; - /* determine last weight */ - { U32 const total = 1 << tableLog; - U32 const rest = total - weightTotal; -- U32 const verif = 1 << BIT_highbit32(rest); -- U32 const lastWeight = BIT_highbit32(rest) + 1; -+ U32 const verif = 1 << ZSTD_highbit32(rest); -+ U32 const lastWeight = ZSTD_highbit32(rest) + 1; - if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ - huffWeight[oSize] = (BYTE)lastWeight; - rankStats[lastWeight]++; -@@ -345,13 +329,13 @@ size_t HUF_readStats_wksp(BYTE* huffWeig - U32* nbSymbolsPtr, U32* tableLogPtr, - const void* src, size_t srcSize, - void* workSpace, size_t wkspSize, -- int bmi2) -+ int flags) - { - #if DYNAMIC_BMI2 -- if (bmi2) { -+ if (flags & HUF_flags_bmi2) { - return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); - } - #endif -- (void)bmi2; -+ (void)flags; - return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); - } ---- a/lib/zstd/common/error_private.c -+++ b/lib/zstd/common/error_private.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -27,9 +28,11 @@ const char* ERR_getErrorString(ERR_enum - case PREFIX(version_unsupported): return "Version not supported"; - case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; - case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; -- case PREFIX(corruption_detected): return "Corrupted block detected"; -+ case PREFIX(corruption_detected): return "Data corruption detected"; - case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; -+ case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification"; - case PREFIX(parameter_unsupported): return "Unsupported parameter"; -+ case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters"; - case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; - case PREFIX(init_missing): return "Context should be init first"; - case PREFIX(memory_allocation): return "Allocation error : not enough memory"; -@@ -38,17 +41,23 @@ const char* ERR_getErrorString(ERR_enum - case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; - case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; - case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; -+ case PREFIX(cannotProduce_uncompressedBlock): return "This mode cannot generate an uncompressed block"; -+ case PREFIX(stabilityCondition_notRespected): return "pledged buffer stability condition is not respected"; - case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; - case PREFIX(dictionary_wrong): return "Dictionary mismatch"; - case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; - case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; - case PREFIX(srcSize_wrong): return "Src size is incorrect"; - case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; -+ case PREFIX(noForwardProgress_destFull): return "Operation made no progress over multiple calls, due to output buffer being full"; -+ case PREFIX(noForwardProgress_inputEmpty): return "Operation made no progress over multiple calls, due to input being empty"; - /* following error codes are not stable and may be removed or changed in a future version */ - case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; - case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; - case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; - case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; -+ case PREFIX(sequenceProducer_failed): return "Block-level external sequence producer returned an error code"; -+ case PREFIX(externalSequences_invalid): return "External sequences are not valid"; - case PREFIX(maxCode): - default: return notErrorCode; - } ---- a/lib/zstd/common/error_private.h -+++ b/lib/zstd/common/error_private.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -13,8 +14,6 @@ - #ifndef ERROR_H_MODULE - #define ERROR_H_MODULE - -- -- - /* **************************************** - * Dependencies - ******************************************/ -@@ -23,7 +22,6 @@ - #include "debug.h" - #include "zstd_deps.h" /* size_t */ - -- - /* **************************************** - * Compiler-specific - ******************************************/ -@@ -49,8 +47,13 @@ ERR_STATIC unsigned ERR_isError(size_t c - ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } - - /* check and forward error code */ --#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e --#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } -+#define CHECK_V_F(e, f) \ -+ size_t const e = f; \ -+ do { \ -+ if (ERR_isError(e)) \ -+ return e; \ -+ } while (0) -+#define CHECK_F(f) do { CHECK_V_F(_var_err__, f); } while (0) - - - /*-**************************************** -@@ -84,10 +87,12 @@ void _force_has_format_string(const char - * We want to force this function invocation to be syntactically correct, but - * we don't want to force runtime evaluation of its arguments. - */ --#define _FORCE_HAS_FORMAT_STRING(...) \ -- if (0) { \ -- _force_has_format_string(__VA_ARGS__); \ -- } -+#define _FORCE_HAS_FORMAT_STRING(...) \ -+ do { \ -+ if (0) { \ -+ _force_has_format_string(__VA_ARGS__); \ -+ } \ -+ } while (0) - - #define ERR_QUOTE(str) #str - -@@ -98,48 +103,49 @@ void _force_has_format_string(const char - * In order to do that (particularly, printing the conditional that failed), - * this can't just wrap RETURN_ERROR(). - */ --#define RETURN_ERROR_IF(cond, err, ...) \ -- if (cond) { \ -- RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ -- __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \ -- _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ -- RAWLOG(3, ": " __VA_ARGS__); \ -- RAWLOG(3, "\n"); \ -- return ERROR(err); \ -- } -+#define RETURN_ERROR_IF(cond, err, ...) \ -+ do { \ -+ if (cond) { \ -+ RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ -+ __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \ -+ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ -+ RAWLOG(3, ": " __VA_ARGS__); \ -+ RAWLOG(3, "\n"); \ -+ return ERROR(err); \ -+ } \ -+ } while (0) - - /* - * Unconditionally return the specified error. - * - * In debug modes, prints additional information. - */ --#define RETURN_ERROR(err, ...) \ -- do { \ -- RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ -- __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \ -- _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ -- RAWLOG(3, ": " __VA_ARGS__); \ -- RAWLOG(3, "\n"); \ -- return ERROR(err); \ -- } while(0); -+#define RETURN_ERROR(err, ...) \ -+ do { \ -+ RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ -+ __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \ -+ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ -+ RAWLOG(3, ": " __VA_ARGS__); \ -+ RAWLOG(3, "\n"); \ -+ return ERROR(err); \ -+ } while(0) - - /* - * If the provided expression evaluates to an error code, returns that error code. - * - * In debug modes, prints additional information. - */ --#define FORWARD_IF_ERROR(err, ...) \ -- do { \ -- size_t const err_code = (err); \ -- if (ERR_isError(err_code)) { \ -- RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ -- __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \ -- _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ -- RAWLOG(3, ": " __VA_ARGS__); \ -- RAWLOG(3, "\n"); \ -- return err_code; \ -- } \ -- } while(0); -- -+#define FORWARD_IF_ERROR(err, ...) \ -+ do { \ -+ size_t const err_code = (err); \ -+ if (ERR_isError(err_code)) { \ -+ RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ -+ __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \ -+ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ -+ RAWLOG(3, ": " __VA_ARGS__); \ -+ RAWLOG(3, "\n"); \ -+ return err_code; \ -+ } \ -+ } while(0) - - #endif /* ERROR_H_MODULE */ ---- a/lib/zstd/common/fse.h -+++ b/lib/zstd/common/fse.h -@@ -1,7 +1,8 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* ****************************************************************** - * FSE : Finite State Entropy codec - * Public Prototypes declaration -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy -@@ -11,8 +12,6 @@ - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - ****************************************************************** */ -- -- - #ifndef FSE_H - #define FSE_H - -@@ -22,7 +21,6 @@ - ******************************************/ - #include "zstd_deps.h" /* size_t, ptrdiff_t */ - -- - /*-***************************************** - * FSE_PUBLIC_API : control library symbols visibility - ******************************************/ -@@ -50,34 +48,6 @@ - FSE_PUBLIC_API unsigned FSE_versionNumber(void); /*< library version number; to be used when checking dll version */ - - --/*-**************************************** --* FSE simple functions --******************************************/ --/*! FSE_compress() : -- Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. -- 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). -- @return : size of compressed data (<= dstCapacity). -- Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! -- if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. -- if FSE_isError(return), compression failed (more details using FSE_getErrorName()) --*/ --FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, -- const void* src, size_t srcSize); -- --/*! FSE_decompress(): -- Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', -- into already allocated destination buffer 'dst', of size 'dstCapacity'. -- @return : size of regenerated data (<= maxDstSize), -- or an error code, which can be tested using FSE_isError() . -- -- ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! -- Why ? : making this distinction requires a header. -- Header management is intentionally delegated to the user layer, which can better manage special cases. --*/ --FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, -- const void* cSrc, size_t cSrcSize); -- -- - /*-***************************************** - * Tool functions - ******************************************/ -@@ -89,20 +59,6 @@ FSE_PUBLIC_API const char* FSE_getErrorN - - - /*-***************************************** --* FSE advanced functions --******************************************/ --/*! FSE_compress2() : -- Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' -- Both parameters can be defined as '0' to mean : use default value -- @return : size of compressed data -- Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! -- if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. -- if FSE_isError(return), it's an error code. --*/ --FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); -- -- --/*-***************************************** - * FSE detailed API - ******************************************/ - /*! -@@ -161,8 +117,6 @@ FSE_PUBLIC_API size_t FSE_writeNCount (v - /*! Constructor and Destructor of FSE_CTable. - Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ - typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ --FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); --FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); - - /*! FSE_buildCTable(): - Builds `ct`, which must be already allocated, using FSE_createCTable(). -@@ -238,23 +192,7 @@ FSE_PUBLIC_API size_t FSE_readNCount_bmi - unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, - const void* rBuffer, size_t rBuffSize, int bmi2); - --/*! Constructor and Destructor of FSE_DTable. -- Note that its size depends on 'tableLog' */ - typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ --FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); --FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); -- --/*! FSE_buildDTable(): -- Builds 'dt', which must be already allocated, using FSE_createDTable(). -- return : 0, or an errorCode, which can be tested using FSE_isError() */ --FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); -- --/*! FSE_decompress_usingDTable(): -- Decompress compressed source `cSrc` of size `cSrcSize` using `dt` -- into `dst` which must be already allocated. -- @return : size of regenerated data (necessarily <= `dstCapacity`), -- or an errorCode, which can be tested using FSE_isError() */ --FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); - - /*! - Tutorial : -@@ -286,13 +224,11 @@ If there is an error, the function will - - #endif /* FSE_H */ - -+ - #if !defined(FSE_H_FSE_STATIC_LINKING_ONLY) - #define FSE_H_FSE_STATIC_LINKING_ONLY -- --/* *** Dependency *** */ - #include "bitstream.h" - -- - /* ***************************************** - * Static allocation - *******************************************/ -@@ -317,16 +253,6 @@ If there is an error, the function will - unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); - /*< same as FSE_optimalTableLog(), which used `minus==2` */ - --/* FSE_compress_wksp() : -- * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). -- * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. -- */ --#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) --size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); -- --size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); --/*< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ -- - size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); - /*< build a fake FSE_CTable, designed to compress always the same symbolValue */ - -@@ -344,19 +270,11 @@ size_t FSE_buildCTable_wksp(FSE_CTable* - FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); - /*< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */ - --size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); --/*< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */ -- --size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); --/*< build a fake FSE_DTable, designed to always generate the same symbolValue */ -- --#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1) -+#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1) - #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) --size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize); --/*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */ -- - size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2); --/*< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */ -+/*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`. -+ * Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */ - - typedef enum { - FSE_repeat_none, /*< Cannot use the previous table */ -@@ -539,20 +457,20 @@ MEM_STATIC void FSE_encodeSymbol(BIT_CSt - FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; - const U16* const stateTable = (const U16*)(statePtr->stateTable); - U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); -- BIT_addBits(bitC, statePtr->value, nbBitsOut); -+ BIT_addBits(bitC, (BitContainerType)statePtr->value, nbBitsOut); - statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; - } - - MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) - { -- BIT_addBits(bitC, statePtr->value, statePtr->stateLog); -+ BIT_addBits(bitC, (BitContainerType)statePtr->value, statePtr->stateLog); - BIT_flushBits(bitC); - } - - - /* FSE_getMaxNbBits() : - * Approximate maximum cost of a symbol, in bits. -- * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) -+ * Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) - * note 1 : assume symbolValue is valid (<= maxSymbolValue) - * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ - MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) -@@ -705,7 +623,4 @@ MEM_STATIC unsigned FSE_endOfDState(cons - - #define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3) - -- - #endif /* FSE_STATIC_LINKING_ONLY */ -- -- ---- a/lib/zstd/common/fse_decompress.c -+++ b/lib/zstd/common/fse_decompress.c -@@ -1,6 +1,7 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* ****************************************************************** - * FSE : Finite State Entropy decoder -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * You can contact the author at : - * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy -@@ -22,8 +23,8 @@ - #define FSE_STATIC_LINKING_ONLY - #include "fse.h" - #include "error_private.h" --#define ZSTD_DEPS_NEED_MALLOC --#include "zstd_deps.h" -+#include "zstd_deps.h" /* ZSTD_memcpy */ -+#include "bits.h" /* ZSTD_highbit32 */ - - - /* ************************************************************** -@@ -55,19 +56,6 @@ - #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) - #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) - -- --/* Function templates */ --FSE_DTable* FSE_createDTable (unsigned tableLog) --{ -- if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; -- return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); --} -- --void FSE_freeDTable (FSE_DTable* dt) --{ -- ZSTD_free(dt); --} -- - static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) - { - void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ -@@ -96,7 +84,7 @@ static size_t FSE_buildDTable_internal(F - symbolNext[s] = 1; - } else { - if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0; -- symbolNext[s] = normalizedCounter[s]; -+ symbolNext[s] = (U16)normalizedCounter[s]; - } } } - ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); - } -@@ -111,8 +99,7 @@ static size_t FSE_buildDTable_internal(F - * all symbols have counts <= 8. We ensure we have 8 bytes at the end of - * our buffer to handle the over-write. - */ -- { -- U64 const add = 0x0101010101010101ull; -+ { U64 const add = 0x0101010101010101ull; - size_t pos = 0; - U64 sv = 0; - U32 s; -@@ -123,14 +110,13 @@ static size_t FSE_buildDTable_internal(F - for (i = 8; i < n; i += 8) { - MEM_write64(spread + pos + i, sv); - } -- pos += n; -- } -- } -+ pos += (size_t)n; -+ } } - /* Now we spread those positions across the table. -- * The benefit of doing it in two stages is that we avoid the the -+ * The benefit of doing it in two stages is that we avoid the - * variable size inner loop, which caused lots of branch misses. - * Now we can run through all the positions without any branch misses. -- * We unroll the loop twice, since that is what emperically worked best. -+ * We unroll the loop twice, since that is what empirically worked best. - */ - { - size_t position = 0; -@@ -166,7 +152,7 @@ static size_t FSE_buildDTable_internal(F - for (u=0; utableLog = 0; -- DTableH->fastMode = 0; -- -- cell->newState = 0; -- cell->symbol = symbolValue; -- cell->nbBits = 0; -- -- return 0; --} -- -- --size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) --{ -- void* ptr = dt; -- FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; -- void* dPtr = dt + 1; -- FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; -- const unsigned tableSize = 1 << nbBits; -- const unsigned tableMask = tableSize - 1; -- const unsigned maxSV1 = tableMask+1; -- unsigned s; -- -- /* Sanity checks */ -- if (nbBits < 1) return ERROR(GENERIC); /* min size */ -- -- /* Build Decoding Table */ -- DTableH->tableLog = (U16)nbBits; -- DTableH->fastMode = 1; -- for (s=0; sfastMode; -- -- /* select fast mode (static) */ -- if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); -- return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); --} -- -- --size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) --{ -- return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0); -+ assert(op >= ostart); -+ return (size_t)(op-ostart); - } - - typedef struct { - short ncount[FSE_MAX_SYMBOL_VALUE + 1]; -- FSE_DTable dtable[]; /* Dynamically sized */ - } FSE_DecompressWksp; - - -@@ -327,13 +252,18 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompr - unsigned tableLog; - unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; - FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace; -+ size_t const dtablePos = sizeof(FSE_DecompressWksp) / sizeof(FSE_DTable); -+ FSE_DTable* const dtable = (FSE_DTable*)workSpace + dtablePos; - -- DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0); -+ FSE_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0); - if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC); - -+ /* correct offset to dtable depends on this property */ -+ FSE_STATIC_ASSERT(sizeof(FSE_DecompressWksp) % sizeof(FSE_DTable) == 0); -+ - /* normal FSE decoding mode */ -- { -- size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); -+ { size_t const NCountLength = -+ FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); - if (FSE_isError(NCountLength)) return NCountLength; - if (tableLog > maxLog) return ERROR(tableLog_tooLarge); - assert(NCountLength <= cSrcSize); -@@ -342,19 +272,20 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompr - } - - if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); -- workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog); -+ assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize); -+ workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); - wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); - -- CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); -+ CHECK_F( FSE_buildDTable_internal(dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); - - { -- const void* ptr = wksp->dtable; -+ const void* ptr = dtable; - const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; - const U32 fastMode = DTableH->fastMode; - - /* select fast mode (static) */ -- if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1); -- return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0); -+ if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1); -+ return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0); - } - } - -@@ -382,9 +313,4 @@ size_t FSE_decompress_wksp_bmi2(void* ds - return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); - } - -- --typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; -- -- -- - #endif /* FSE_COMMONDEFS_ONLY */ ---- a/lib/zstd/common/huf.h -+++ b/lib/zstd/common/huf.h -@@ -1,7 +1,8 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* ****************************************************************** - * huff0 huffman codec, - * part of Finite State Entropy library -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy -@@ -12,105 +13,26 @@ - * You may select, at your option, one of the above-listed licenses. - ****************************************************************** */ - -- - #ifndef HUF_H_298734234 - #define HUF_H_298734234 - - /* *** Dependencies *** */ - #include "zstd_deps.h" /* size_t */ -- -- --/* *** library symbols visibility *** */ --/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, -- * HUF symbols remain "private" (internal symbols for library only). -- * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ --#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) --# define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) --#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ --# define HUF_PUBLIC_API __declspec(dllexport) --#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) --# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ --#else --# define HUF_PUBLIC_API --#endif -- -- --/* ========================== */ --/* *** simple functions *** */ --/* ========================== */ -- --/* HUF_compress() : -- * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. -- * 'dst' buffer must be already allocated. -- * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). -- * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. -- * @return : size of compressed data (<= `dstCapacity`). -- * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! -- * if HUF_isError(return), compression failed (more details using HUF_getErrorName()) -- */ --HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, -- const void* src, size_t srcSize); -- --/* HUF_decompress() : -- * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', -- * into already allocated buffer 'dst', of minimum size 'dstSize'. -- * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. -- * Note : in contrast with FSE, HUF_decompress can regenerate -- * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, -- * because it knows size to regenerate (originalSize). -- * @return : size of regenerated data (== originalSize), -- * or an error code, which can be tested using HUF_isError() -- */ --HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, -- const void* cSrc, size_t cSrcSize); -- -+#include "mem.h" /* U32 */ -+#define FSE_STATIC_LINKING_ONLY -+#include "fse.h" - - /* *** Tool functions *** */ --#define HUF_BLOCKSIZE_MAX (128 * 1024) /*< maximum input size for a single block compressed with HUF_compress */ --HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /*< maximum compressed size (worst case) */ -+#define HUF_BLOCKSIZE_MAX (128 * 1024) /*< maximum input size for a single block compressed with HUF_compress */ -+size_t HUF_compressBound(size_t size); /*< maximum compressed size (worst case) */ - - /* Error Management */ --HUF_PUBLIC_API unsigned HUF_isError(size_t code); /*< tells if a return value is an error code */ --HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /*< provides error code string (useful for debugging) */ -- -+unsigned HUF_isError(size_t code); /*< tells if a return value is an error code */ -+const char* HUF_getErrorName(size_t code); /*< provides error code string (useful for debugging) */ - --/* *** Advanced function *** */ - --/* HUF_compress2() : -- * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. -- * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . -- * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ --HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, -- const void* src, size_t srcSize, -- unsigned maxSymbolValue, unsigned tableLog); -- --/* HUF_compress4X_wksp() : -- * Same as HUF_compress2(), but uses externally allocated `workSpace`. -- * `workspace` must be at least as large as HUF_WORKSPACE_SIZE */ - #define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */) - #define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64)) --HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, -- const void* src, size_t srcSize, -- unsigned maxSymbolValue, unsigned tableLog, -- void* workSpace, size_t wkspSize); -- --#endif /* HUF_H_298734234 */ -- --/* ****************************************************************** -- * WARNING !! -- * The following section contains advanced and experimental definitions -- * which shall never be used in the context of a dynamic library, -- * because they are not guaranteed to remain stable in the future. -- * Only consider them in association with static linking. -- * *****************************************************************/ --#if !defined(HUF_H_HUF_STATIC_LINKING_ONLY) --#define HUF_H_HUF_STATIC_LINKING_ONLY -- --/* *** Dependencies *** */ --#include "mem.h" /* U32 */ --#define FSE_STATIC_LINKING_ONLY --#include "fse.h" -- - - /* *** Constants *** */ - #define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */ -@@ -151,25 +73,49 @@ typedef U32 HUF_DTable; - /* **************************************** - * Advanced decompression functions - ******************************************/ --size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */ --#ifndef HUF_FORCE_DECOMPRESS_X1 --size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */ --#endif - --size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< decodes RLE and uncompressed */ --size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< considers RLE and uncompressed as errors */ --size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< considers RLE and uncompressed as errors */ --size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */ --size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< single-symbol decoder */ --#ifndef HUF_FORCE_DECOMPRESS_X1 --size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */ --size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< double-symbols decoder */ --#endif -+/* -+ * Huffman flags bitset. -+ * For all flags, 0 is the default value. -+ */ -+typedef enum { -+ /* -+ * If compiled with DYNAMIC_BMI2: Set flag only if the CPU supports BMI2 at runtime. -+ * Otherwise: Ignored. -+ */ -+ HUF_flags_bmi2 = (1 << 0), -+ /* -+ * If set: Test possible table depths to find the one that produces the smallest header + encoded size. -+ * If unset: Use heuristic to find the table depth. -+ */ -+ HUF_flags_optimalDepth = (1 << 1), -+ /* -+ * If set: If the previous table can encode the input, always reuse the previous table. -+ * If unset: If the previous table can encode the input, reuse the previous table if it results in a smaller output. -+ */ -+ HUF_flags_preferRepeat = (1 << 2), -+ /* -+ * If set: Sample the input and check if the sample is uncompressible, if it is then don't attempt to compress. -+ * If unset: Always histogram the entire input. -+ */ -+ HUF_flags_suspectUncompressible = (1 << 3), -+ /* -+ * If set: Don't use assembly implementations -+ * If unset: Allow using assembly implementations -+ */ -+ HUF_flags_disableAsm = (1 << 4), -+ /* -+ * If set: Don't use the fast decoding loop, always use the fallback decoding loop. -+ * If unset: Use the fast decoding loop when possible. -+ */ -+ HUF_flags_disableFast = (1 << 5) -+} HUF_flags_e; - - - /* **************************************** - * HUF detailed API - * ****************************************/ -+#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra - - /*! HUF_compress() does the following: - * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") -@@ -182,12 +128,12 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_D - * For example, it's possible to compress several blocks using the same 'CTable', - * or to save and regenerate 'CTable' using external methods. - */ --unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); --size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ --size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); -+unsigned HUF_minTableLog(unsigned symbolCardinality); -+unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue); -+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace, -+ size_t wkspSize, HUF_CElt* table, const unsigned* count, int flags); /* table is used as scratch space for building and testing tables, not a return value */ - size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); --size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); --size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2); -+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags); - size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); - int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); - -@@ -196,6 +142,7 @@ typedef enum { - HUF_repeat_check, /*< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ - HUF_repeat_valid /*< Can use the previous table and it is assumed to be valid */ - } HUF_repeat; -+ - /* HUF_compress4X_repeat() : - * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. - * If it uses hufTable it does not modify hufTable or repeat. -@@ -206,13 +153,13 @@ size_t HUF_compress4X_repeat(void* dst, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned tableLog, - void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ -- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible); -+ HUF_CElt* hufTable, HUF_repeat* repeat, int flags); - - /* HUF_buildCTable_wksp() : - * Same as HUF_buildCTable(), but using externally allocated scratch buffer. - * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. - */ --#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) -+#define HUF_CTABLE_WORKSPACE_SIZE_U32 ((4 * (HUF_SYMBOLVALUE_MAX + 1)) + 192) - #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) - size_t HUF_buildCTable_wksp (HUF_CElt* tree, - const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, -@@ -238,7 +185,7 @@ size_t HUF_readStats_wksp(BYTE* huffWeig - U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, - const void* src, size_t srcSize, - void* workspace, size_t wkspSize, -- int bmi2); -+ int flags); - - /* HUF_readCTable() : - * Loading a CTable saved with HUF_writeCTable() */ -@@ -246,9 +193,22 @@ size_t HUF_readCTable (HUF_CElt* CTable, - - /* HUF_getNbBitsFromCTable() : - * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX -- * Note 1 : is not inlined, as HUF_CElt definition is private */ -+ * Note 1 : If symbolValue > HUF_readCTableHeader(symbolTable).maxSymbolValue, returns 0 -+ * Note 2 : is not inlined, as HUF_CElt definition is private -+ */ - U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue); - -+typedef struct { -+ BYTE tableLog; -+ BYTE maxSymbolValue; -+ BYTE unused[sizeof(size_t) - 2]; -+} HUF_CTableHeader; -+ -+/* HUF_readCTableHeader() : -+ * @returns The header from the CTable specifying the tableLog and the maxSymbolValue. -+ */ -+HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable); -+ - /* - * HUF_decompress() does the following: - * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics -@@ -276,32 +236,12 @@ U32 HUF_selectDecoder (size_t dstSize, s - #define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9)) - #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) - --#ifndef HUF_FORCE_DECOMPRESS_X2 --size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); --size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); --#endif --#ifndef HUF_FORCE_DECOMPRESS_X1 --size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); --size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); --#endif -- --size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); --#ifndef HUF_FORCE_DECOMPRESS_X2 --size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); --#endif --#ifndef HUF_FORCE_DECOMPRESS_X1 --size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); --#endif -- - - /* ====================== */ - /* single stream variants */ - /* ====================== */ - --size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); --size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */ --size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); --size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2); -+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags); - /* HUF_compress1X_repeat() : - * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. - * If it uses hufTable it does not modify hufTable or repeat. -@@ -312,47 +252,27 @@ size_t HUF_compress1X_repeat(void* dst, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned tableLog, - void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ -- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible); -- --size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ --#ifndef HUF_FORCE_DECOMPRESS_X1 --size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ --#endif -- --size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); --size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); --#ifndef HUF_FORCE_DECOMPRESS_X2 --size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */ --size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< single-symbol decoder */ --#endif --#ifndef HUF_FORCE_DECOMPRESS_X1 --size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */ --size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< double-symbols decoder */ --#endif -+ HUF_CElt* hufTable, HUF_repeat* repeat, int flags); - --size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /*< automatic selection of sing or double symbol decoder, based on DTable */ --#ifndef HUF_FORCE_DECOMPRESS_X2 --size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); --#endif -+size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); - #ifndef HUF_FORCE_DECOMPRESS_X1 --size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); /*< double-symbols decoder */ - #endif - - /* BMI2 variants. - * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. - */ --size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); -+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags); - #ifndef HUF_FORCE_DECOMPRESS_X2 --size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); -+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); - #endif --size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); --size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); -+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags); -+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); - #ifndef HUF_FORCE_DECOMPRESS_X2 --size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); -+size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags); - #endif - #ifndef HUF_FORCE_DECOMPRESS_X1 --size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); -+size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags); - #endif - --#endif /* HUF_STATIC_LINKING_ONLY */ -- -+#endif /* HUF_H_298734234 */ ---- a/lib/zstd/common/mem.h -+++ b/lib/zstd/common/mem.h -@@ -1,6 +1,6 @@ - /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -24,6 +24,7 @@ - /*-**************************************** - * Compiler specifics - ******************************************/ -+#undef MEM_STATIC /* may be already defined from common/compiler.h */ - #define MEM_STATIC static inline - - /*-************************************************************** ---- a/lib/zstd/common/portability_macros.h -+++ b/lib/zstd/common/portability_macros.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -12,7 +13,7 @@ - #define ZSTD_PORTABILITY_MACROS_H - - /* -- * This header file contains macro defintions to support portability. -+ * This header file contains macro definitions to support portability. - * This header is shared between C and ASM code, so it MUST only - * contain macro definitions. It MUST not contain any C code. - * -@@ -45,30 +46,35 @@ - /* Mark the internal assembly functions as hidden */ - #ifdef __ELF__ - # define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func -+#elif defined(__APPLE__) -+# define ZSTD_HIDE_ASM_FUNCTION(func) .private_extern func - #else - # define ZSTD_HIDE_ASM_FUNCTION(func) - #endif - -+/* Compile time determination of BMI2 support */ -+ -+ - /* Enable runtime BMI2 dispatch based on the CPU. - * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. - */ - #ifndef DYNAMIC_BMI2 -- #if ((defined(__clang__) && __has_attribute(__target__)) \ -+# if ((defined(__clang__) && __has_attribute(__target__)) \ - || (defined(__GNUC__) \ - && (__GNUC__ >= 11))) \ -- && (defined(__x86_64__) || defined(_M_X64)) \ -+ && (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)) \ - && !defined(__BMI2__) -- # define DYNAMIC_BMI2 1 -- #else -- # define DYNAMIC_BMI2 0 -- #endif -+# define DYNAMIC_BMI2 1 -+# else -+# define DYNAMIC_BMI2 0 -+# endif - #endif - - /* -- * Only enable assembly for GNUC comptabile compilers, -+ * Only enable assembly for GNU C compatible compilers, - * because other platforms may not support GAS assembly syntax. - * -- * Only enable assembly for Linux / MacOS, other platforms may -+ * Only enable assembly for Linux / MacOS / Win32, other platforms may - * work, but they haven't been tested. This could likely be - * extended to BSD systems. - * -@@ -90,4 +96,23 @@ - */ - #define ZSTD_ENABLE_ASM_X86_64_BMI2 0 - -+/* -+ * For x86 ELF targets, add .note.gnu.property section for Intel CET in -+ * assembly sources when CET is enabled. -+ * -+ * Additionally, any function that may be called indirectly must begin -+ * with ZSTD_CET_ENDBRANCH. -+ */ -+#if defined(__ELF__) && (defined(__x86_64__) || defined(__i386__)) \ -+ && defined(__has_include) -+# if __has_include() -+# include -+# define ZSTD_CET_ENDBRANCH _CET_ENDBR -+# endif -+#endif -+ -+#ifndef ZSTD_CET_ENDBRANCH -+# define ZSTD_CET_ENDBRANCH -+#endif -+ - #endif /* ZSTD_PORTABILITY_MACROS_H */ ---- a/lib/zstd/common/zstd_common.c -+++ b/lib/zstd/common/zstd_common.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -14,7 +15,6 @@ - * Dependencies - ***************************************/ - #define ZSTD_DEPS_NEED_MALLOC --#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */ - #include "error_private.h" - #include "zstd_internal.h" - -@@ -47,37 +47,3 @@ ZSTD_ErrorCode ZSTD_getErrorCode(size_t - /*! ZSTD_getErrorString() : - * provides error code string from enum */ - const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } -- -- -- --/*=************************************************************** --* Custom allocator --****************************************************************/ --void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) --{ -- if (customMem.customAlloc) -- return customMem.customAlloc(customMem.opaque, size); -- return ZSTD_malloc(size); --} -- --void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) --{ -- if (customMem.customAlloc) { -- /* calloc implemented as malloc+memset; -- * not as efficient as calloc, but next best guess for custom malloc */ -- void* const ptr = customMem.customAlloc(customMem.opaque, size); -- ZSTD_memset(ptr, 0, size); -- return ptr; -- } -- return ZSTD_calloc(1, size); --} -- --void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) --{ -- if (ptr!=NULL) { -- if (customMem.customFree) -- customMem.customFree(customMem.opaque, ptr); -- else -- ZSTD_free(ptr); -- } --} ---- a/lib/zstd/common/zstd_deps.h -+++ b/lib/zstd/common/zstd_deps.h -@@ -1,6 +1,6 @@ - /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -105,3 +105,17 @@ static uint64_t ZSTD_div64(uint64_t divi - - #endif /* ZSTD_DEPS_IO */ - #endif /* ZSTD_DEPS_NEED_IO */ -+ -+/* -+ * Only requested when MSAN is enabled. -+ * Need: -+ * intptr_t -+ */ -+#ifdef ZSTD_DEPS_NEED_STDINT -+#ifndef ZSTD_DEPS_STDINT -+#define ZSTD_DEPS_STDINT -+ -+/* intptr_t already provided by ZSTD_DEPS_COMMON */ -+ -+#endif /* ZSTD_DEPS_STDINT */ -+#endif /* ZSTD_DEPS_NEED_STDINT */ ---- a/lib/zstd/common/zstd_internal.h -+++ b/lib/zstd/common/zstd_internal.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -28,12 +29,10 @@ - #include - #define FSE_STATIC_LINKING_ONLY - #include "fse.h" --#define HUF_STATIC_LINKING_ONLY - #include "huf.h" - #include /* XXH_reset, update, digest */ - #define ZSTD_TRACE 0 - -- - /* ---- static assert (debug) --- */ - #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) - #define ZSTD_isError ERR_isError /* for inlining */ -@@ -83,16 +82,17 @@ typedef enum { bt_raw, bt_rle, bt_compre - #define ZSTD_FRAMECHECKSUMSIZE 4 - - #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ --#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ -+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */ -+#define MIN_LITERALS_FOR_4_STREAMS 6 - --#define HufLog 12 --typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; -+typedef enum { set_basic, set_rle, set_compressed, set_repeat } SymbolEncodingType_e; - - #define LONGNBSEQ 0x7F00 - - #define MINMATCH 3 - - #define Litbits 8 -+#define LitHufLog 11 - #define MaxLit ((1<= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); -@@ -225,12 +227,6 @@ void ZSTD_wildcopy(void* dst, const void - * one COPY16() in the first call. Then, do two calls per loop since - * at that point it is more likely to have a high trip count. - */ --#ifdef __aarch64__ -- do { -- COPY16(op, ip); -- } -- while (op < oend); --#else - ZSTD_copy16(op, ip); - if (16 >= length) return; - op += 16; -@@ -240,7 +236,6 @@ void ZSTD_wildcopy(void* dst, const void - COPY16(op, ip); - } - while (op < oend); --#endif - } - } - -@@ -273,62 +268,6 @@ typedef enum { - /*-******************************************* - * Private declarations - *********************************************/ --typedef struct seqDef_s { -- U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */ -- U16 litLength; -- U16 mlBase; /* mlBase == matchLength - MINMATCH */ --} seqDef; -- --/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */ --typedef enum { -- ZSTD_llt_none = 0, /* no longLengthType */ -- ZSTD_llt_literalLength = 1, /* represents a long literal */ -- ZSTD_llt_matchLength = 2 /* represents a long match */ --} ZSTD_longLengthType_e; -- --typedef struct { -- seqDef* sequencesStart; -- seqDef* sequences; /* ptr to end of sequences */ -- BYTE* litStart; -- BYTE* lit; /* ptr to end of literals */ -- BYTE* llCode; -- BYTE* mlCode; -- BYTE* ofCode; -- size_t maxNbSeq; -- size_t maxNbLit; -- -- /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength -- * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment -- * the existing value of the litLength or matchLength by 0x10000. -- */ -- ZSTD_longLengthType_e longLengthType; -- U32 longLengthPos; /* Index of the sequence to apply long length modification to */ --} seqStore_t; -- --typedef struct { -- U32 litLength; -- U32 matchLength; --} ZSTD_sequenceLength; -- --/* -- * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences -- * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength. -- */ --MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) --{ -- ZSTD_sequenceLength seqLen; -- seqLen.litLength = seq->litLength; -- seqLen.matchLength = seq->mlBase + MINMATCH; -- if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { -- if (seqStore->longLengthType == ZSTD_llt_literalLength) { -- seqLen.litLength += 0xFFFF; -- } -- if (seqStore->longLengthType == ZSTD_llt_matchLength) { -- seqLen.matchLength += 0xFFFF; -- } -- } -- return seqLen; --} - - /* - * Contains the compressed frame size and an upper-bound for the decompressed frame size. -@@ -337,74 +276,11 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getS - * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` - */ - typedef struct { -+ size_t nbBlocks; - size_t compressedSize; - unsigned long long decompressedBound; - } ZSTD_frameSizeInfo; /* decompress & legacy */ - --const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ --void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ -- --/* custom memory allocation functions */ --void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem); --void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem); --void ZSTD_customFree(void* ptr, ZSTD_customMem customMem); -- -- --MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ --{ -- assert(val != 0); -- { --# if (__GNUC__ >= 3) /* GCC Intrinsic */ -- return __builtin_clz (val) ^ 31; --# else /* Software version */ -- static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; -- U32 v = val; -- v |= v >> 1; -- v |= v >> 2; -- v |= v >> 4; -- v |= v >> 8; -- v |= v >> 16; -- return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; --# endif -- } --} -- --/* -- * Counts the number of trailing zeros of a `size_t`. -- * Most compilers should support CTZ as a builtin. A backup -- * implementation is provided if the builtin isn't supported, but -- * it may not be terribly efficient. -- */ --MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val) --{ -- if (MEM_64bits()) { --# if (__GNUC__ >= 4) -- return __builtin_ctzll((U64)val); --# else -- static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19, -- 4, 25, 14, 28, 9, 34, 20, 56, -- 5, 17, 26, 54, 15, 41, 29, 43, -- 10, 31, 38, 35, 21, 45, 49, 57, -- 63, 6, 12, 18, 24, 27, 33, 55, -- 16, 53, 40, 42, 30, 37, 44, 48, -- 62, 11, 23, 32, 52, 39, 36, 47, -- 61, 22, 51, 46, 60, 50, 59, 58 }; -- return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; --# endif -- } else { /* 32 bits */ --# if (__GNUC__ >= 3) -- return __builtin_ctz((U32)val); --# else -- static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3, -- 30, 22, 20, 15, 25, 17, 4, 8, -- 31, 27, 13, 23, 21, 19, 16, 7, -- 26, 12, 18, 6, 11, 5, 10, 9 }; -- return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; --# endif -- } --} -- -- - /* ZSTD_invalidateRepCodes() : - * ensures next compression will not use repcodes from previous block. - * Note : only works with regular variant; -@@ -420,13 +296,13 @@ typedef struct { - - /*! ZSTD_getcBlockSize() : - * Provides the size of compressed block from block header `src` */ --/* Used by: decompress, fullbench (does not get its definition from here) */ -+/* Used by: decompress, fullbench */ - size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, - blockProperties_t* bpPtr); - - /*! ZSTD_decodeSeqHeaders() : - * decode sequence header from src */ --/* Used by: decompress, fullbench (does not get its definition from here) */ -+/* Used by: zstd_decompress_block, fullbench */ - size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, - const void* src, size_t srcSize); - -@@ -439,5 +315,4 @@ MEM_STATIC int ZSTD_cpuSupportsBmi2(void - return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid); - } - -- - #endif /* ZSTD_CCOMMON_H_MODULE */ ---- a/lib/zstd/compress/clevels.h -+++ b/lib/zstd/compress/clevels.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the ---- a/lib/zstd/compress/fse_compress.c -+++ b/lib/zstd/compress/fse_compress.c -@@ -1,6 +1,7 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* ****************************************************************** - * FSE : Finite State Entropy encoder -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * You can contact the author at : - * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy -@@ -25,7 +26,8 @@ - #include "../common/error_private.h" - #define ZSTD_DEPS_NEED_MALLOC - #define ZSTD_DEPS_NEED_MATH64 --#include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */ -+#include "../common/zstd_deps.h" /* ZSTD_memset */ -+#include "../common/bits.h" /* ZSTD_highbit32 */ - - - /* ************************************************************** -@@ -90,7 +92,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* - assert(tableLog < 16); /* required for threshold strategy to work */ - - /* For explanations on how to distribute symbol values over the table : -- * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ -+ * https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ - - #ifdef __clang_analyzer__ - ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ -@@ -191,7 +193,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* - break; - default : - assert(normalizedCounter[s] > 1); -- { U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1); -+ { U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1); - U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut; - symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; - symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]); -@@ -224,8 +226,8 @@ size_t FSE_NCountWriteBound(unsigned max - size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog - + 4 /* bitCount initialized at 4 */ - + 2 /* first two symbols may use one additional bit each */) / 8) -- + 1 /* round up to whole nb bytes */ -- + 2 /* additional two bytes for bitstream flush */; -+ + 1 /* round up to whole nb bytes */ -+ + 2 /* additional two bytes for bitstream flush */; - return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ - } - -@@ -254,7 +256,7 @@ FSE_writeNCount_generic (void* header, s - /* Init */ - remaining = tableSize+1; /* +1 for extra accuracy */ - threshold = tableSize; -- nbBits = tableLog+1; -+ nbBits = (int)tableLog+1; - - while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ - if (previousIs0) { -@@ -273,7 +275,7 @@ FSE_writeNCount_generic (void* header, s - } - while (symbol >= start+3) { - start+=3; -- bitStream += 3 << bitCount; -+ bitStream += 3U << bitCount; - bitCount += 2; - } - bitStream += (symbol-start) << bitCount; -@@ -293,7 +295,7 @@ FSE_writeNCount_generic (void* header, s - count++; /* +1 for extra accuracy */ - if (count>=threshold) - count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ -- bitStream += count << bitCount; -+ bitStream += (U32)count << bitCount; - bitCount += nbBits; - bitCount -= (count>8); - out+= (bitCount+7) /8; - -- return (out-ostart); -+ assert(out >= ostart); -+ return (size_t)(out-ostart); - } - - -@@ -342,21 +345,11 @@ size_t FSE_writeNCount (void* buffer, si - * FSE Compression Code - ****************************************************************/ - --FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) --{ -- size_t size; -- if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; -- size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); -- return (FSE_CTable*)ZSTD_malloc(size); --} -- --void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); } -- - /* provides the minimum logSize to safely represent a distribution */ - static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) - { -- U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; -- U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; -+ U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1; -+ U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2; - U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; - assert(srcSize > 1); /* Not supported, RLE should be used instead */ - return minBits; -@@ -364,7 +357,7 @@ static unsigned FSE_minTableLog(size_t s - - unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) - { -- U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; -+ U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus; - U32 tableLog = maxTableLog; - U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); - assert(srcSize > 1); /* Not supported, RLE should be used instead */ -@@ -532,40 +525,6 @@ size_t FSE_normalizeCount (short* normal - return tableLog; - } - -- --/* fake FSE_CTable, for raw (uncompressed) input */ --size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits) --{ -- const unsigned tableSize = 1 << nbBits; -- const unsigned tableMask = tableSize - 1; -- const unsigned maxSymbolValue = tableMask; -- void* const ptr = ct; -- U16* const tableU16 = ( (U16*) ptr) + 2; -- void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */ -- FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); -- unsigned s; -- -- /* Sanity checks */ -- if (nbBits < 1) return ERROR(GENERIC); /* min size */ -- -- /* header */ -- tableU16[-2] = (U16) nbBits; -- tableU16[-1] = (U16) maxSymbolValue; -- -- /* Build table */ -- for (s=0; s= 2 -+ -+static size_t showU32(const U32* arr, size_t size) - { -- return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); -+ size_t u; -+ for (u=0; u= sizeof(HUF_WriteCTableWksp)); -+ -+ assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue); -+ assert(HUF_readCTableHeader(CTable).tableLog == huffLog); -+ - /* check conditions */ - if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC); - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); -@@ -204,16 +286,6 @@ size_t HUF_writeCTable_wksp(void* dst, s - return ((maxSymbolValue+1)/2) + 1; - } - --/*! HUF_writeCTable() : -- `CTable` : Huffman tree to save, using huf representation. -- @return : size of saved CTable */ --size_t HUF_writeCTable (void* dst, size_t maxDstSize, -- const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) --{ -- HUF_WriteCTableWksp wksp; -- return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp)); --} -- - - size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) - { -@@ -231,7 +303,9 @@ size_t HUF_readCTable (HUF_CElt* CTable, - if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); - if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); - -- CTable[0] = tableLog; -+ *maxSymbolValuePtr = nbSymbols - 1; -+ -+ HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr); - - /* Prepare base value per rank */ - { U32 n, nextRankStart = 0; -@@ -263,74 +337,71 @@ size_t HUF_readCTable (HUF_CElt* CTable, - { U32 n; for (n=0; n HUF_readCTableHeader(CTable).maxSymbolValue) -+ return 0; - return (U32)HUF_getNbBits(ct[symbolValue]); - } - - --typedef struct nodeElt_s { -- U32 count; -- U16 parent; -- BYTE byte; -- BYTE nbBits; --} nodeElt; -- - /* - * HUF_setMaxHeight(): -- * Enforces maxNbBits on the Huffman tree described in huffNode. -+ * Try to enforce @targetNbBits on the Huffman tree described in @huffNode. - * -- * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts -- * the tree to so that it is a valid canonical Huffman tree. -+ * It attempts to convert all nodes with nbBits > @targetNbBits -+ * to employ @targetNbBits instead. Then it adjusts the tree -+ * so that it remains a valid canonical Huffman tree. - * - * @pre The sum of the ranks of each symbol == 2^largestBits, - * where largestBits == huffNode[lastNonNull].nbBits. - * @post The sum of the ranks of each symbol == 2^largestBits, -- * where largestBits is the return value <= maxNbBits. -+ * where largestBits is the return value (expected <= targetNbBits). - * -- * @param huffNode The Huffman tree modified in place to enforce maxNbBits. -+ * @param huffNode The Huffman tree modified in place to enforce targetNbBits. -+ * It's presumed sorted, from most frequent to rarest symbol. - * @param lastNonNull The symbol with the lowest count in the Huffman tree. -- * @param maxNbBits The maximum allowed number of bits, which the Huffman tree -+ * @param targetNbBits The allowed number of bits, which the Huffman tree - * may not respect. After this function the Huffman tree will -- * respect maxNbBits. -- * @return The maximum number of bits of the Huffman tree after adjustment, -- * necessarily no more than maxNbBits. -+ * respect targetNbBits. -+ * @return The maximum number of bits of the Huffman tree after adjustment. - */ --static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) -+static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits) - { - const U32 largestBits = huffNode[lastNonNull].nbBits; -- /* early exit : no elt > maxNbBits, so the tree is already valid. */ -- if (largestBits <= maxNbBits) return largestBits; -+ /* early exit : no elt > targetNbBits, so the tree is already valid. */ -+ if (largestBits <= targetNbBits) return largestBits; -+ -+ DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits); - - /* there are several too large elements (at least >= 2) */ - { int totalCost = 0; -- const U32 baseCost = 1 << (largestBits - maxNbBits); -+ const U32 baseCost = 1 << (largestBits - targetNbBits); - int n = (int)lastNonNull; - -- /* Adjust any ranks > maxNbBits to maxNbBits. -+ /* Adjust any ranks > targetNbBits to targetNbBits. - * Compute totalCost, which is how far the sum of the ranks is - * we are over 2^largestBits after adjust the offending ranks. - */ -- while (huffNode[n].nbBits > maxNbBits) { -+ while (huffNode[n].nbBits > targetNbBits) { - totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); -- huffNode[n].nbBits = (BYTE)maxNbBits; -+ huffNode[n].nbBits = (BYTE)targetNbBits; - n--; - } -- /* n stops at huffNode[n].nbBits <= maxNbBits */ -- assert(huffNode[n].nbBits <= maxNbBits); -- /* n end at index of smallest symbol using < maxNbBits */ -- while (huffNode[n].nbBits == maxNbBits) --n; -+ /* n stops at huffNode[n].nbBits <= targetNbBits */ -+ assert(huffNode[n].nbBits <= targetNbBits); -+ /* n end at index of smallest symbol using < targetNbBits */ -+ while (huffNode[n].nbBits == targetNbBits) --n; - -- /* renorm totalCost from 2^largestBits to 2^maxNbBits -+ /* renorm totalCost from 2^largestBits to 2^targetNbBits - * note : totalCost is necessarily a multiple of baseCost */ -- assert((totalCost & (baseCost - 1)) == 0); -- totalCost >>= (largestBits - maxNbBits); -+ assert(((U32)totalCost & (baseCost - 1)) == 0); -+ totalCost >>= (largestBits - targetNbBits); - assert(totalCost > 0); - - /* repay normalized cost */ -@@ -339,19 +410,19 @@ static U32 HUF_setMaxHeight(nodeElt* huf - - /* Get pos of last (smallest = lowest cum. count) symbol per rank */ - ZSTD_memset(rankLast, 0xF0, sizeof(rankLast)); -- { U32 currentNbBits = maxNbBits; -+ { U32 currentNbBits = targetNbBits; - int pos; - for (pos=n ; pos >= 0; pos--) { - if (huffNode[pos].nbBits >= currentNbBits) continue; -- currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ -- rankLast[maxNbBits-currentNbBits] = (U32)pos; -+ currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */ -+ rankLast[targetNbBits-currentNbBits] = (U32)pos; - } } - - while (totalCost > 0) { - /* Try to reduce the next power of 2 above totalCost because we - * gain back half the rank. - */ -- U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; -+ U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1; - for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { - U32 const highPos = rankLast[nBitsToDecrease]; - U32 const lowPos = rankLast[nBitsToDecrease-1]; -@@ -391,7 +462,7 @@ static U32 HUF_setMaxHeight(nodeElt* huf - rankLast[nBitsToDecrease] = noSymbol; - else { - rankLast[nBitsToDecrease]--; -- if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) -+ if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease) - rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ - } - } /* while (totalCost > 0) */ -@@ -403,11 +474,11 @@ static U32 HUF_setMaxHeight(nodeElt* huf - * TODO. - */ - while (totalCost < 0) { /* Sometimes, cost correction overshoot */ -- /* special case : no rank 1 symbol (using maxNbBits-1); -- * let's create one from largest rank 0 (using maxNbBits). -+ /* special case : no rank 1 symbol (using targetNbBits-1); -+ * let's create one from largest rank 0 (using targetNbBits). - */ - if (rankLast[1] == noSymbol) { -- while (huffNode[n].nbBits == maxNbBits) n--; -+ while (huffNode[n].nbBits == targetNbBits) n--; - huffNode[n+1].nbBits--; - assert(n >= 0); - rankLast[1] = (U32)(n+1); -@@ -421,7 +492,7 @@ static U32 HUF_setMaxHeight(nodeElt* huf - } /* repay normalized cost */ - } /* there are several too large elements (at least >= 2) */ - -- return maxNbBits; -+ return targetNbBits; - } - - typedef struct { -@@ -429,7 +500,7 @@ typedef struct { - U16 curr; - } rankPos; - --typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; -+typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)]; - - /* Number of buckets available for HUF_sort() */ - #define RANK_POSITION_TABLE_SIZE 192 -@@ -448,8 +519,8 @@ typedef struct { - * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing. - */ - #define RANK_POSITION_MAX_COUNT_LOG 32 --#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */ --#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */ -+#define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */) -+#define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */) - - /* Return the appropriate bucket index for a given count. See definition of - * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy. -@@ -457,7 +528,7 @@ typedef struct { - static U32 HUF_getIndex(U32 const count) { - return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF) - ? count -- : BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN; -+ : ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN; - } - - /* Helper swap function for HUF_quickSortPartition() */ -@@ -580,7 +651,7 @@ static void HUF_sort(nodeElt huffNode[], - - /* Sort each bucket. */ - for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) { -- U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base; -+ int const bucketSize = rankPosition[n].curr - rankPosition[n].base; - U32 const bucketStartIdx = rankPosition[n].base; - if (bucketSize > 1) { - assert(bucketStartIdx < maxSymbolValue1); -@@ -591,6 +662,7 @@ static void HUF_sort(nodeElt huffNode[], - assert(HUF_isSorted(huffNode, maxSymbolValue1)); - } - -+ - /* HUF_buildCTable_wksp() : - * Same as HUF_buildCTable(), but using externally allocated scratch buffer. - * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). -@@ -611,6 +683,7 @@ static int HUF_buildTree(nodeElt* huffNo - int lowS, lowN; - int nodeNb = STARTNODE; - int n, nodeRoot; -+ DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1); - /* init for parents */ - nonNullRank = (int)maxSymbolValue; - while(huffNode[nonNullRank].count == 0) nonNullRank--; -@@ -637,6 +710,8 @@ static int HUF_buildTree(nodeElt* huffNo - for (n=0; n<=nonNullRank; n++) - huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; - -+ DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1)); -+ - return nonNullRank; - } - -@@ -671,31 +746,40 @@ static void HUF_buildCTableFromTree(HUF_ - HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */ - for (n=0; nhuffNodeTbl; - nodeElt* const huffNode = huffNode0+1; - int nonNullRank; - -+ HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables)); -+ -+ DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1); -+ - /* safety checks */ - if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) -- return ERROR(workSpace_tooSmall); -+ return ERROR(workSpace_tooSmall); - if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) -- return ERROR(maxSymbolValue_tooLarge); -+ return ERROR(maxSymbolValue_tooLarge); - ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable)); - - /* sort, decreasing order */ - HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); -+ DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1)); - - /* build tree */ - nonNullRank = HUF_buildTree(huffNode, maxSymbolValue); - -- /* enforce maxTableLog */ -+ /* determine and enforce maxTableLog */ - maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); - if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ - -@@ -716,13 +800,20 @@ size_t HUF_estimateCompressedSize(const - } - - int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { -- HUF_CElt const* ct = CTable + 1; -- int bad = 0; -- int s; -- for (s = 0; s <= (int)maxSymbolValue; ++s) { -- bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0); -- } -- return !bad; -+ HUF_CTableHeader header = HUF_readCTableHeader(CTable); -+ HUF_CElt const* ct = CTable + 1; -+ int bad = 0; -+ int s; -+ -+ assert(header.tableLog <= HUF_TABLELOG_ABSOLUTEMAX); -+ -+ if (header.maxSymbolValue < maxSymbolValue) -+ return 0; -+ -+ for (s = 0; s <= (int)maxSymbolValue; ++s) { -+ bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0); -+ } -+ return !bad; - } - - size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } -@@ -804,7 +895,7 @@ FORCE_INLINE_TEMPLATE void HUF_addBits(H - #if DEBUGLEVEL >= 1 - { - size_t const nbBits = HUF_getNbBits(elt); -- size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1; -+ size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1; - (void)dirtyBits; - /* Middle bits are 0. */ - assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0); -@@ -884,7 +975,7 @@ static size_t HUF_closeCStream(HUF_CStre - { - size_t const nbBits = bitC->bitPos[0] & 0xFF; - if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ -- return (bitC->ptr - bitC->startPtr) + (nbBits > 0); -+ return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0); - } - } - -@@ -964,17 +1055,17 @@ HUF_compress1X_usingCTable_internal_body - const void* src, size_t srcSize, - const HUF_CElt* CTable) - { -- U32 const tableLog = (U32)CTable[0]; -+ U32 const tableLog = HUF_readCTableHeader(CTable).tableLog; - HUF_CElt const* ct = CTable + 1; - const BYTE* ip = (const BYTE*) src; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstSize; -- BYTE* op = ostart; - HUF_CStream_t bitC; - - /* init */ - if (dstSize < 8) return 0; /* not enough space to compress */ -- { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op)); -+ { BYTE* op = ostart; -+ size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op)); - if (HUF_isError(initErr)) return 0; } - - if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11) -@@ -1045,9 +1136,9 @@ HUF_compress1X_usingCTable_internal_defa - static size_t - HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, - const void* src, size_t srcSize, -- const HUF_CElt* CTable, const int bmi2) -+ const HUF_CElt* CTable, const int flags) - { -- if (bmi2) { -+ if (flags & HUF_flags_bmi2) { - return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); - } - return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); -@@ -1058,28 +1149,23 @@ HUF_compress1X_usingCTable_internal(void - static size_t - HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, - const void* src, size_t srcSize, -- const HUF_CElt* CTable, const int bmi2) -+ const HUF_CElt* CTable, const int flags) - { -- (void)bmi2; -+ (void)flags; - return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); - } - - #endif - --size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) --{ -- return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); --} -- --size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) -+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags) - { -- return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); -+ return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags); - } - - static size_t - HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, - const void* src, size_t srcSize, -- const HUF_CElt* CTable, int bmi2) -+ const HUF_CElt* CTable, int flags) - { - size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ - const BYTE* ip = (const BYTE*) src; -@@ -1093,7 +1179,7 @@ HUF_compress4X_usingCTable_internal(void - op += 6; /* jumpTable */ - - assert(op <= oend); -- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); -+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); - if (cSize == 0 || cSize > 65535) return 0; - MEM_writeLE16(ostart, (U16)cSize); - op += cSize; -@@ -1101,7 +1187,7 @@ HUF_compress4X_usingCTable_internal(void - - ip += segmentSize; - assert(op <= oend); -- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); -+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); - if (cSize == 0 || cSize > 65535) return 0; - MEM_writeLE16(ostart+2, (U16)cSize); - op += cSize; -@@ -1109,7 +1195,7 @@ HUF_compress4X_usingCTable_internal(void - - ip += segmentSize; - assert(op <= oend); -- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); -+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); - if (cSize == 0 || cSize > 65535) return 0; - MEM_writeLE16(ostart+4, (U16)cSize); - op += cSize; -@@ -1118,7 +1204,7 @@ HUF_compress4X_usingCTable_internal(void - ip += segmentSize; - assert(op <= oend); - assert(ip <= iend); -- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); -+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) ); - if (cSize == 0 || cSize > 65535) return 0; - op += cSize; - } -@@ -1126,14 +1212,9 @@ HUF_compress4X_usingCTable_internal(void - return (size_t)(op-ostart); - } - --size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) --{ -- return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); --} -- --size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) -+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags) - { -- return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); -+ return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags); - } - - typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; -@@ -1141,11 +1222,11 @@ typedef enum { HUF_singleStream, HUF_fou - static size_t HUF_compressCTable_internal( - BYTE* const ostart, BYTE* op, BYTE* const oend, - const void* src, size_t srcSize, -- HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) -+ HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags) - { - size_t const cSize = (nbStreams==HUF_singleStream) ? -- HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : -- HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); -+ HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) : -+ HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags); - if (HUF_isError(cSize)) { return cSize; } - if (cSize==0) { return 0; } /* uncompressible */ - op += cSize; -@@ -1168,6 +1249,81 @@ typedef struct { - #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096 - #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */ - -+unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue) -+{ -+ unsigned cardinality = 0; -+ unsigned i; -+ -+ for (i = 0; i < maxSymbolValue + 1; i++) { -+ if (count[i] != 0) cardinality += 1; -+ } -+ -+ return cardinality; -+} -+ -+unsigned HUF_minTableLog(unsigned symbolCardinality) -+{ -+ U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1; -+ return minBitsSymbols; -+} -+ -+unsigned HUF_optimalTableLog( -+ unsigned maxTableLog, -+ size_t srcSize, -+ unsigned maxSymbolValue, -+ void* workSpace, size_t wkspSize, -+ HUF_CElt* table, -+ const unsigned* count, -+ int flags) -+{ -+ assert(srcSize > 1); /* Not supported, RLE should be used instead */ -+ assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables)); -+ -+ if (!(flags & HUF_flags_optimalDepth)) { -+ /* cheap evaluation, based on FSE */ -+ return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); -+ } -+ -+ { BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp); -+ size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp); -+ size_t hSize, newSize; -+ const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue); -+ const unsigned minTableLog = HUF_minTableLog(symbolCardinality); -+ size_t optSize = ((size_t) ~0) - 1; -+ unsigned optLog = maxTableLog, optLogGuess; -+ -+ DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize); -+ -+ /* Search until size increases */ -+ for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) { -+ DEBUGLOG(7, "checking for huffLog=%u", optLogGuess); -+ -+ { size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize); -+ if (ERR_isError(maxBits)) continue; -+ -+ if (maxBits < optLogGuess && optLogGuess > minTableLog) break; -+ -+ hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize); -+ } -+ -+ if (ERR_isError(hSize)) continue; -+ -+ newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize; -+ -+ if (newSize > optSize + 1) { -+ break; -+ } -+ -+ if (newSize < optSize) { -+ optSize = newSize; -+ optLog = optLogGuess; -+ } -+ } -+ assert(optLog <= HUF_TABLELOG_MAX); -+ return optLog; -+ } -+} -+ - /* HUF_compress_internal() : - * `workSpace_align4` must be aligned on 4-bytes boundaries, - * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */ -@@ -1177,14 +1333,14 @@ HUF_compress_internal (void* dst, size_t - unsigned maxSymbolValue, unsigned huffLog, - HUF_nbStreams_e nbStreams, - void* workSpace, size_t wkspSize, -- HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, -- const int bmi2, unsigned suspectUncompressible) -+ HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags) - { - HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t)); - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstSize; - BYTE* op = ostart; - -+ DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize); - HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE); - - /* checks & inits */ -@@ -1198,16 +1354,17 @@ HUF_compress_internal (void* dst, size_t - if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; - - /* Heuristic : If old table is valid, use it for small inputs */ -- if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { -+ if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) { - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, -- nbStreams, oldHufTable, bmi2); -+ nbStreams, oldHufTable, flags); - } - - /* If uncompressible data is suspected, do a smaller sampling first */ - DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2); -- if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) { -+ if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) { - size_t largestTotal = 0; -+ DEBUGLOG(5, "input suspected incompressible : sampling to check"); - { unsigned maxSymbolValueBegin = maxSymbolValue; - CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); - largestTotal += largestBegin; -@@ -1224,6 +1381,7 @@ HUF_compress_internal (void* dst, size_t - if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ - if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ - } -+ DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1)); - - /* Check validity of previous table */ - if ( repeat -@@ -1232,25 +1390,20 @@ HUF_compress_internal (void* dst, size_t - *repeat = HUF_repeat_none; - } - /* Heuristic : use existing table for small inputs */ -- if (preferRepeat && repeat && *repeat != HUF_repeat_none) { -+ if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) { - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, -- nbStreams, oldHufTable, bmi2); -+ nbStreams, oldHufTable, flags); - } - - /* Build Huffman Tree */ -- huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); -+ huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags); - { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, - maxSymbolValue, huffLog, - &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp)); - CHECK_F(maxBits); - huffLog = (U32)maxBits; -- } -- /* Zero unused symbols in CTable, so we can check it for validity */ -- { -- size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue); -- size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt); -- ZSTD_memset(table->CTable + ctableSize, 0, unusedSize); -+ DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1)); - } - - /* Write table description header */ -@@ -1263,7 +1416,7 @@ HUF_compress_internal (void* dst, size_t - if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, -- nbStreams, oldHufTable, bmi2); -+ nbStreams, oldHufTable, flags); - } } - - /* Use the new huffman table */ -@@ -1275,61 +1428,35 @@ HUF_compress_internal (void* dst, size_t - } - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, -- nbStreams, table->CTable, bmi2); --} -- -- --size_t HUF_compress1X_wksp (void* dst, size_t dstSize, -- const void* src, size_t srcSize, -- unsigned maxSymbolValue, unsigned huffLog, -- void* workSpace, size_t wkspSize) --{ -- return HUF_compress_internal(dst, dstSize, src, srcSize, -- maxSymbolValue, huffLog, HUF_singleStream, -- workSpace, wkspSize, -- NULL, NULL, 0, 0 /*bmi2*/, 0); -+ nbStreams, table->CTable, flags); - } - - size_t HUF_compress1X_repeat (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - void* workSpace, size_t wkspSize, -- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, -- int bmi2, unsigned suspectUncompressible) -+ HUF_CElt* hufTable, HUF_repeat* repeat, int flags) - { -+ DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize); - return HUF_compress_internal(dst, dstSize, src, srcSize, - maxSymbolValue, huffLog, HUF_singleStream, - workSpace, wkspSize, hufTable, -- repeat, preferRepeat, bmi2, suspectUncompressible); --} -- --/* HUF_compress4X_repeat(): -- * compress input using 4 streams. -- * provide workspace to generate compression tables */ --size_t HUF_compress4X_wksp (void* dst, size_t dstSize, -- const void* src, size_t srcSize, -- unsigned maxSymbolValue, unsigned huffLog, -- void* workSpace, size_t wkspSize) --{ -- return HUF_compress_internal(dst, dstSize, src, srcSize, -- maxSymbolValue, huffLog, HUF_fourStreams, -- workSpace, wkspSize, -- NULL, NULL, 0, 0 /*bmi2*/, 0); -+ repeat, flags); - } - - /* HUF_compress4X_repeat(): - * compress input using 4 streams. - * consider skipping quickly -- * re-use an existing huffman compression table */ -+ * reuse an existing huffman compression table */ - size_t HUF_compress4X_repeat (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - void* workSpace, size_t wkspSize, -- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible) -+ HUF_CElt* hufTable, HUF_repeat* repeat, int flags) - { -+ DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize); - return HUF_compress_internal(dst, dstSize, src, srcSize, - maxSymbolValue, huffLog, HUF_fourStreams, - workSpace, wkspSize, -- hufTable, repeat, preferRepeat, bmi2, suspectUncompressible); -+ hufTable, repeat, flags); - } -- ---- a/lib/zstd/compress/zstd_compress.c -+++ b/lib/zstd/compress/zstd_compress.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -11,12 +12,13 @@ - /*-************************************* - * Dependencies - ***************************************/ -+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */ - #include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */ - #include "../common/mem.h" -+#include "../common/error_private.h" - #include "hist.h" /* HIST_countFast_wksp */ - #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ - #include "../common/fse.h" --#define HUF_STATIC_LINKING_ONLY - #include "../common/huf.h" - #include "zstd_compress_internal.h" - #include "zstd_compress_sequences.h" -@@ -27,6 +29,7 @@ - #include "zstd_opt.h" - #include "zstd_ldm.h" - #include "zstd_compress_superblock.h" -+#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_rotateRight_U64 */ - - /* *************************************************************** - * Tuning parameters -@@ -44,7 +47,7 @@ - * in log format, aka 17 => 1 << 17 == 128Ki positions. - * This structure is only used in zstd_opt. - * Since allocation is centralized for all strategies, it has to be known here. -- * The actual (selected) size of the hash table is then stored in ZSTD_matchState_t.hashLog3, -+ * The actual (selected) size of the hash table is then stored in ZSTD_MatchState_t.hashLog3, - * so that zstd_opt.c doesn't need to know about this constant. - */ - #ifndef ZSTD_HASHLOG3_MAX -@@ -55,14 +58,17 @@ - * Helper functions - ***************************************/ - /* ZSTD_compressBound() -- * Note that the result from this function is only compatible with the "normal" -- * full-block strategy. -- * When there are a lot of small blocks due to frequent flush in streaming mode -- * the overhead of headers can make the compressed data to be larger than the -- * return value of ZSTD_compressBound(). -+ * Note that the result from this function is only valid for -+ * the one-pass compression functions. -+ * When employing the streaming mode, -+ * if flushes are frequently altering the size of blocks, -+ * the overhead from block headers can make the compressed data larger -+ * than the return value of ZSTD_compressBound(). - */ - size_t ZSTD_compressBound(size_t srcSize) { -- return ZSTD_COMPRESSBOUND(srcSize); -+ size_t const r = ZSTD_COMPRESSBOUND(srcSize); -+ if (r==0) return ERROR(srcSize_wrong); -+ return r; - } - - -@@ -75,12 +81,12 @@ struct ZSTD_CDict_s { - ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */ - U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ - ZSTD_cwksp workspace; -- ZSTD_matchState_t matchState; -+ ZSTD_MatchState_t matchState; - ZSTD_compressedBlockState_t cBlockState; - ZSTD_customMem customMem; - U32 dictID; - int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ -- ZSTD_paramSwitch_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use -+ ZSTD_ParamSwitch_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use - * row-based matchfinder. Unless the cdict is reloaded, we will use - * the same greedy/lazy matchfinder at compression time. - */ -@@ -130,11 +136,12 @@ ZSTD_CCtx* ZSTD_initStaticCCtx(void* wor - ZSTD_cwksp_move(&cctx->workspace, &ws); - cctx->staticSize = workspaceSize; - -- /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ -- if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; -+ /* statically sized space. tmpWorkspace never moves (but prev/next block swap places) */ -+ if (!ZSTD_cwksp_check_available(&cctx->workspace, TMP_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; - cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); - cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); -- cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE); -+ cctx->tmpWorkspace = ZSTD_cwksp_reserve_object(&cctx->workspace, TMP_WORKSPACE_SIZE); -+ cctx->tmpWkspSize = TMP_WORKSPACE_SIZE; - cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); - return cctx; - } -@@ -168,15 +175,13 @@ static void ZSTD_freeCCtxContent(ZSTD_CC - - size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) - { -+ DEBUGLOG(3, "ZSTD_freeCCtx (address: %p)", (void*)cctx); - if (cctx==NULL) return 0; /* support free on NULL */ - RETURN_ERROR_IF(cctx->staticSize, memory_allocation, - "not compatible with static CCtx"); -- { -- int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); -+ { int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); - ZSTD_freeCCtxContent(cctx); -- if (!cctxInWorkspace) { -- ZSTD_customFree(cctx, cctx->customMem); -- } -+ if (!cctxInWorkspace) ZSTD_customFree(cctx, cctx->customMem); - } - return 0; - } -@@ -205,7 +210,7 @@ size_t ZSTD_sizeof_CStream(const ZSTD_CS - } - - /* private API call, for dictBuilder only */ --const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } -+const SeqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } - - /* Returns true if the strategy supports using a row based matchfinder */ - static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) { -@@ -215,32 +220,23 @@ static int ZSTD_rowMatchFinderSupported( - /* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder - * for this compression. - */ --static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_paramSwitch_e mode) { -+static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_ParamSwitch_e mode) { - assert(mode != ZSTD_ps_auto); - return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_ps_enable); - } - - /* Returns row matchfinder usage given an initial mode and cParams */ --static ZSTD_paramSwitch_e ZSTD_resolveRowMatchFinderMode(ZSTD_paramSwitch_e mode, -+static ZSTD_ParamSwitch_e ZSTD_resolveRowMatchFinderMode(ZSTD_ParamSwitch_e mode, - const ZSTD_compressionParameters* const cParams) { --#if defined(ZSTD_ARCH_X86_SSE2) || defined(ZSTD_ARCH_ARM_NEON) -- int const kHasSIMD128 = 1; --#else -- int const kHasSIMD128 = 0; --#endif - if (mode != ZSTD_ps_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */ - mode = ZSTD_ps_disable; - if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode; -- if (kHasSIMD128) { -- if (cParams->windowLog > 14) mode = ZSTD_ps_enable; -- } else { -- if (cParams->windowLog > 17) mode = ZSTD_ps_enable; -- } -+ if (cParams->windowLog > 14) mode = ZSTD_ps_enable; - return mode; - } - - /* Returns block splitter usage (generally speaking, when using slower/stronger compression modes) */ --static ZSTD_paramSwitch_e ZSTD_resolveBlockSplitterMode(ZSTD_paramSwitch_e mode, -+static ZSTD_ParamSwitch_e ZSTD_resolveBlockSplitterMode(ZSTD_ParamSwitch_e mode, - const ZSTD_compressionParameters* const cParams) { - if (mode != ZSTD_ps_auto) return mode; - return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17) ? ZSTD_ps_enable : ZSTD_ps_disable; -@@ -248,7 +244,7 @@ static ZSTD_paramSwitch_e ZSTD_resolveBl - - /* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */ - static int ZSTD_allocateChainTable(const ZSTD_strategy strategy, -- const ZSTD_paramSwitch_e useRowMatchFinder, -+ const ZSTD_ParamSwitch_e useRowMatchFinder, - const U32 forDDSDict) { - assert(useRowMatchFinder != ZSTD_ps_auto); - /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate. -@@ -257,16 +253,44 @@ static int ZSTD_allocateChainTable(const - return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder)); - } - --/* Returns 1 if compression parameters are such that we should -+/* Returns ZSTD_ps_enable if compression parameters are such that we should - * enable long distance matching (wlog >= 27, strategy >= btopt). -- * Returns 0 otherwise. -+ * Returns ZSTD_ps_disable otherwise. - */ --static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode, -+static ZSTD_ParamSwitch_e ZSTD_resolveEnableLdm(ZSTD_ParamSwitch_e mode, - const ZSTD_compressionParameters* const cParams) { - if (mode != ZSTD_ps_auto) return mode; - return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable; - } - -+static int ZSTD_resolveExternalSequenceValidation(int mode) { -+ return mode; -+} -+ -+/* Resolves maxBlockSize to the default if no value is present. */ -+static size_t ZSTD_resolveMaxBlockSize(size_t maxBlockSize) { -+ if (maxBlockSize == 0) { -+ return ZSTD_BLOCKSIZE_MAX; -+ } else { -+ return maxBlockSize; -+ } -+} -+ -+static ZSTD_ParamSwitch_e ZSTD_resolveExternalRepcodeSearch(ZSTD_ParamSwitch_e value, int cLevel) { -+ if (value != ZSTD_ps_auto) return value; -+ if (cLevel < 10) { -+ return ZSTD_ps_disable; -+ } else { -+ return ZSTD_ps_enable; -+ } -+} -+ -+/* Returns 1 if compression parameters are such that CDict hashtable and chaintable indices are tagged. -+ * If so, the tags need to be removed in ZSTD_resetCCtx_byCopyingCDict. */ -+static int ZSTD_CDictIndicesAreTagged(const ZSTD_compressionParameters* const cParams) { -+ return cParams->strategy == ZSTD_fast || cParams->strategy == ZSTD_dfast; -+} -+ - static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( - ZSTD_compressionParameters cParams) - { -@@ -282,8 +306,12 @@ static ZSTD_CCtx_params ZSTD_makeCCtxPar - assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog); - assert(cctxParams.ldmParams.hashRateLog < 32); - } -- cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams); -+ cctxParams.postBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.postBlockSplitter, &cParams); - cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); -+ cctxParams.validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams.validateSequences); -+ cctxParams.maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams.maxBlockSize); -+ cctxParams.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams.searchForExternalRepcodes, -+ cctxParams.compressionLevel); - assert(!ZSTD_checkCParams(cParams)); - return cctxParams; - } -@@ -329,10 +357,13 @@ size_t ZSTD_CCtxParams_init(ZSTD_CCtx_pa - #define ZSTD_NO_CLEVEL 0 - - /* -- * Initializes the cctxParams from params and compressionLevel. -+ * Initializes `cctxParams` from `params` and `compressionLevel`. - * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL. - */ --static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel) -+static void -+ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, -+ const ZSTD_parameters* params, -+ int compressionLevel) - { - assert(!ZSTD_checkCParams(params->cParams)); - ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); -@@ -343,10 +374,13 @@ static void ZSTD_CCtxParams_init_interna - */ - cctxParams->compressionLevel = compressionLevel; - cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams); -- cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, ¶ms->cParams); -+ cctxParams->postBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->postBlockSplitter, ¶ms->cParams); - cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams); -+ cctxParams->validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams->validateSequences); -+ cctxParams->maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams->maxBlockSize); -+ cctxParams->searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams->searchForExternalRepcodes, compressionLevel); - DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d", -- cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm); -+ cctxParams->useRowMatchFinder, cctxParams->postBlockSplitter, cctxParams->ldmParams.enableLdm); - } - - size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) -@@ -359,7 +393,7 @@ size_t ZSTD_CCtxParams_init_advanced(ZST - - /* - * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone. -- * @param param Validated zstd parameters. -+ * @param params Validated zstd parameters. - */ - static void ZSTD_CCtxParams_setZstdParams( - ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) -@@ -455,8 +489,8 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_c - return bounds; - - case ZSTD_c_enableLongDistanceMatching: -- bounds.lowerBound = 0; -- bounds.upperBound = 1; -+ bounds.lowerBound = (int)ZSTD_ps_auto; -+ bounds.upperBound = (int)ZSTD_ps_disable; - return bounds; - - case ZSTD_c_ldmHashLog: -@@ -534,11 +568,16 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_c - bounds.upperBound = 1; - return bounds; - -- case ZSTD_c_useBlockSplitter: -+ case ZSTD_c_splitAfterSequences: - bounds.lowerBound = (int)ZSTD_ps_auto; - bounds.upperBound = (int)ZSTD_ps_disable; - return bounds; - -+ case ZSTD_c_blockSplitterLevel: -+ bounds.lowerBound = 0; -+ bounds.upperBound = ZSTD_BLOCKSPLITTER_LEVEL_MAX; -+ return bounds; -+ - case ZSTD_c_useRowMatchFinder: - bounds.lowerBound = (int)ZSTD_ps_auto; - bounds.upperBound = (int)ZSTD_ps_disable; -@@ -549,6 +588,26 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_c - bounds.upperBound = 1; - return bounds; - -+ case ZSTD_c_prefetchCDictTables: -+ bounds.lowerBound = (int)ZSTD_ps_auto; -+ bounds.upperBound = (int)ZSTD_ps_disable; -+ return bounds; -+ -+ case ZSTD_c_enableSeqProducerFallback: -+ bounds.lowerBound = 0; -+ bounds.upperBound = 1; -+ return bounds; -+ -+ case ZSTD_c_maxBlockSize: -+ bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN; -+ bounds.upperBound = ZSTD_BLOCKSIZE_MAX; -+ return bounds; -+ -+ case ZSTD_c_repcodeResolution: -+ bounds.lowerBound = (int)ZSTD_ps_auto; -+ bounds.upperBound = (int)ZSTD_ps_disable; -+ return bounds; -+ - default: - bounds.error = ERROR(parameter_unsupported); - return bounds; -@@ -567,10 +626,11 @@ static size_t ZSTD_cParam_clampBounds(ZS - return 0; - } - --#define BOUNDCHECK(cParam, val) { \ -- RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ -- parameter_outOfBound, "Param out of bounds"); \ --} -+#define BOUNDCHECK(cParam, val) \ -+ do { \ -+ RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ -+ parameter_outOfBound, "Param out of bounds"); \ -+ } while (0) - - - static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) -@@ -584,6 +644,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_ - case ZSTD_c_minMatch: - case ZSTD_c_targetLength: - case ZSTD_c_strategy: -+ case ZSTD_c_blockSplitterLevel: - return 1; - - case ZSTD_c_format: -@@ -610,9 +671,13 @@ static int ZSTD_isUpdateAuthorized(ZSTD_ - case ZSTD_c_stableOutBuffer: - case ZSTD_c_blockDelimiters: - case ZSTD_c_validateSequences: -- case ZSTD_c_useBlockSplitter: -+ case ZSTD_c_splitAfterSequences: - case ZSTD_c_useRowMatchFinder: - case ZSTD_c_deterministicRefPrefix: -+ case ZSTD_c_prefetchCDictTables: -+ case ZSTD_c_enableSeqProducerFallback: -+ case ZSTD_c_maxBlockSize: -+ case ZSTD_c_repcodeResolution: - default: - return 0; - } -@@ -625,7 +690,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* - if (ZSTD_isUpdateAuthorized(param)) { - cctx->cParamsChanged = 1; - } else { -- RETURN_ERROR(stage_wrong, "can only set params in ctx init stage"); -+ RETURN_ERROR(stage_wrong, "can only set params in cctx init stage"); - } } - - switch(param) -@@ -665,9 +730,14 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* - case ZSTD_c_stableOutBuffer: - case ZSTD_c_blockDelimiters: - case ZSTD_c_validateSequences: -- case ZSTD_c_useBlockSplitter: -+ case ZSTD_c_splitAfterSequences: -+ case ZSTD_c_blockSplitterLevel: - case ZSTD_c_useRowMatchFinder: - case ZSTD_c_deterministicRefPrefix: -+ case ZSTD_c_prefetchCDictTables: -+ case ZSTD_c_enableSeqProducerFallback: -+ case ZSTD_c_maxBlockSize: -+ case ZSTD_c_repcodeResolution: - break; - - default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); -@@ -723,12 +793,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD - case ZSTD_c_minMatch : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_minMatch, value); -- CCtxParams->cParams.minMatch = value; -+ CCtxParams->cParams.minMatch = (U32)value; - return CCtxParams->cParams.minMatch; - - case ZSTD_c_targetLength : - BOUNDCHECK(ZSTD_c_targetLength, value); -- CCtxParams->cParams.targetLength = value; -+ CCtxParams->cParams.targetLength = (U32)value; - return CCtxParams->cParams.targetLength; - - case ZSTD_c_strategy : -@@ -741,12 +811,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD - /* Content size written in frame header _when known_ (default:1) */ - DEBUGLOG(4, "set content size flag = %u", (value!=0)); - CCtxParams->fParams.contentSizeFlag = value != 0; -- return CCtxParams->fParams.contentSizeFlag; -+ return (size_t)CCtxParams->fParams.contentSizeFlag; - - case ZSTD_c_checksumFlag : - /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ - CCtxParams->fParams.checksumFlag = value != 0; -- return CCtxParams->fParams.checksumFlag; -+ return (size_t)CCtxParams->fParams.checksumFlag; - - case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ - DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); -@@ -755,18 +825,18 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD - - case ZSTD_c_forceMaxWindow : - CCtxParams->forceWindow = (value != 0); -- return CCtxParams->forceWindow; -+ return (size_t)CCtxParams->forceWindow; - - case ZSTD_c_forceAttachDict : { - const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; -- BOUNDCHECK(ZSTD_c_forceAttachDict, pref); -+ BOUNDCHECK(ZSTD_c_forceAttachDict, (int)pref); - CCtxParams->attachDictPref = pref; - return CCtxParams->attachDictPref; - } - - case ZSTD_c_literalCompressionMode : { -- const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value; -- BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); -+ const ZSTD_ParamSwitch_e lcm = (ZSTD_ParamSwitch_e)value; -+ BOUNDCHECK(ZSTD_c_literalCompressionMode, (int)lcm); - CCtxParams->literalCompressionMode = lcm; - return CCtxParams->literalCompressionMode; - } -@@ -789,47 +859,50 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD - - case ZSTD_c_enableDedicatedDictSearch : - CCtxParams->enableDedicatedDictSearch = (value!=0); -- return CCtxParams->enableDedicatedDictSearch; -+ return (size_t)CCtxParams->enableDedicatedDictSearch; - - case ZSTD_c_enableLongDistanceMatching : -- CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value; -+ BOUNDCHECK(ZSTD_c_enableLongDistanceMatching, value); -+ CCtxParams->ldmParams.enableLdm = (ZSTD_ParamSwitch_e)value; - return CCtxParams->ldmParams.enableLdm; - - case ZSTD_c_ldmHashLog : - if (value!=0) /* 0 ==> auto */ - BOUNDCHECK(ZSTD_c_ldmHashLog, value); -- CCtxParams->ldmParams.hashLog = value; -+ CCtxParams->ldmParams.hashLog = (U32)value; - return CCtxParams->ldmParams.hashLog; - - case ZSTD_c_ldmMinMatch : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_ldmMinMatch, value); -- CCtxParams->ldmParams.minMatchLength = value; -+ CCtxParams->ldmParams.minMatchLength = (U32)value; - return CCtxParams->ldmParams.minMatchLength; - - case ZSTD_c_ldmBucketSizeLog : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); -- CCtxParams->ldmParams.bucketSizeLog = value; -+ CCtxParams->ldmParams.bucketSizeLog = (U32)value; - return CCtxParams->ldmParams.bucketSizeLog; - - case ZSTD_c_ldmHashRateLog : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_ldmHashRateLog, value); -- CCtxParams->ldmParams.hashRateLog = value; -+ CCtxParams->ldmParams.hashRateLog = (U32)value; - return CCtxParams->ldmParams.hashRateLog; - - case ZSTD_c_targetCBlockSize : -- if (value!=0) /* 0 ==> default */ -+ if (value!=0) { /* 0 ==> default */ -+ value = MAX(value, ZSTD_TARGETCBLOCKSIZE_MIN); - BOUNDCHECK(ZSTD_c_targetCBlockSize, value); -- CCtxParams->targetCBlockSize = value; -+ } -+ CCtxParams->targetCBlockSize = (U32)value; - return CCtxParams->targetCBlockSize; - - case ZSTD_c_srcSizeHint : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_srcSizeHint, value); - CCtxParams->srcSizeHint = value; -- return CCtxParams->srcSizeHint; -+ return (size_t)CCtxParams->srcSizeHint; - - case ZSTD_c_stableInBuffer: - BOUNDCHECK(ZSTD_c_stableInBuffer, value); -@@ -843,28 +916,55 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD - - case ZSTD_c_blockDelimiters: - BOUNDCHECK(ZSTD_c_blockDelimiters, value); -- CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value; -+ CCtxParams->blockDelimiters = (ZSTD_SequenceFormat_e)value; - return CCtxParams->blockDelimiters; - - case ZSTD_c_validateSequences: - BOUNDCHECK(ZSTD_c_validateSequences, value); - CCtxParams->validateSequences = value; -- return CCtxParams->validateSequences; -+ return (size_t)CCtxParams->validateSequences; - -- case ZSTD_c_useBlockSplitter: -- BOUNDCHECK(ZSTD_c_useBlockSplitter, value); -- CCtxParams->useBlockSplitter = (ZSTD_paramSwitch_e)value; -- return CCtxParams->useBlockSplitter; -+ case ZSTD_c_splitAfterSequences: -+ BOUNDCHECK(ZSTD_c_splitAfterSequences, value); -+ CCtxParams->postBlockSplitter = (ZSTD_ParamSwitch_e)value; -+ return CCtxParams->postBlockSplitter; -+ -+ case ZSTD_c_blockSplitterLevel: -+ BOUNDCHECK(ZSTD_c_blockSplitterLevel, value); -+ CCtxParams->preBlockSplitter_level = value; -+ return (size_t)CCtxParams->preBlockSplitter_level; - - case ZSTD_c_useRowMatchFinder: - BOUNDCHECK(ZSTD_c_useRowMatchFinder, value); -- CCtxParams->useRowMatchFinder = (ZSTD_paramSwitch_e)value; -+ CCtxParams->useRowMatchFinder = (ZSTD_ParamSwitch_e)value; - return CCtxParams->useRowMatchFinder; - - case ZSTD_c_deterministicRefPrefix: - BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value); - CCtxParams->deterministicRefPrefix = !!value; -- return CCtxParams->deterministicRefPrefix; -+ return (size_t)CCtxParams->deterministicRefPrefix; -+ -+ case ZSTD_c_prefetchCDictTables: -+ BOUNDCHECK(ZSTD_c_prefetchCDictTables, value); -+ CCtxParams->prefetchCDictTables = (ZSTD_ParamSwitch_e)value; -+ return CCtxParams->prefetchCDictTables; -+ -+ case ZSTD_c_enableSeqProducerFallback: -+ BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value); -+ CCtxParams->enableMatchFinderFallback = value; -+ return (size_t)CCtxParams->enableMatchFinderFallback; -+ -+ case ZSTD_c_maxBlockSize: -+ if (value!=0) /* 0 ==> default */ -+ BOUNDCHECK(ZSTD_c_maxBlockSize, value); -+ assert(value>=0); -+ CCtxParams->maxBlockSize = (size_t)value; -+ return CCtxParams->maxBlockSize; -+ -+ case ZSTD_c_repcodeResolution: -+ BOUNDCHECK(ZSTD_c_repcodeResolution, value); -+ CCtxParams->searchForExternalRepcodes = (ZSTD_ParamSwitch_e)value; -+ return CCtxParams->searchForExternalRepcodes; - - default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); - } -@@ -881,7 +981,7 @@ size_t ZSTD_CCtxParams_getParameter( - switch(param) - { - case ZSTD_c_format : -- *value = CCtxParams->format; -+ *value = (int)CCtxParams->format; - break; - case ZSTD_c_compressionLevel : - *value = CCtxParams->compressionLevel; -@@ -896,16 +996,16 @@ size_t ZSTD_CCtxParams_getParameter( - *value = (int)CCtxParams->cParams.chainLog; - break; - case ZSTD_c_searchLog : -- *value = CCtxParams->cParams.searchLog; -+ *value = (int)CCtxParams->cParams.searchLog; - break; - case ZSTD_c_minMatch : -- *value = CCtxParams->cParams.minMatch; -+ *value = (int)CCtxParams->cParams.minMatch; - break; - case ZSTD_c_targetLength : -- *value = CCtxParams->cParams.targetLength; -+ *value = (int)CCtxParams->cParams.targetLength; - break; - case ZSTD_c_strategy : -- *value = (unsigned)CCtxParams->cParams.strategy; -+ *value = (int)CCtxParams->cParams.strategy; - break; - case ZSTD_c_contentSizeFlag : - *value = CCtxParams->fParams.contentSizeFlag; -@@ -920,10 +1020,10 @@ size_t ZSTD_CCtxParams_getParameter( - *value = CCtxParams->forceWindow; - break; - case ZSTD_c_forceAttachDict : -- *value = CCtxParams->attachDictPref; -+ *value = (int)CCtxParams->attachDictPref; - break; - case ZSTD_c_literalCompressionMode : -- *value = CCtxParams->literalCompressionMode; -+ *value = (int)CCtxParams->literalCompressionMode; - break; - case ZSTD_c_nbWorkers : - assert(CCtxParams->nbWorkers == 0); -@@ -939,19 +1039,19 @@ size_t ZSTD_CCtxParams_getParameter( - *value = CCtxParams->enableDedicatedDictSearch; - break; - case ZSTD_c_enableLongDistanceMatching : -- *value = CCtxParams->ldmParams.enableLdm; -+ *value = (int)CCtxParams->ldmParams.enableLdm; - break; - case ZSTD_c_ldmHashLog : -- *value = CCtxParams->ldmParams.hashLog; -+ *value = (int)CCtxParams->ldmParams.hashLog; - break; - case ZSTD_c_ldmMinMatch : -- *value = CCtxParams->ldmParams.minMatchLength; -+ *value = (int)CCtxParams->ldmParams.minMatchLength; - break; - case ZSTD_c_ldmBucketSizeLog : -- *value = CCtxParams->ldmParams.bucketSizeLog; -+ *value = (int)CCtxParams->ldmParams.bucketSizeLog; - break; - case ZSTD_c_ldmHashRateLog : -- *value = CCtxParams->ldmParams.hashRateLog; -+ *value = (int)CCtxParams->ldmParams.hashRateLog; - break; - case ZSTD_c_targetCBlockSize : - *value = (int)CCtxParams->targetCBlockSize; -@@ -971,8 +1071,11 @@ size_t ZSTD_CCtxParams_getParameter( - case ZSTD_c_validateSequences : - *value = (int)CCtxParams->validateSequences; - break; -- case ZSTD_c_useBlockSplitter : -- *value = (int)CCtxParams->useBlockSplitter; -+ case ZSTD_c_splitAfterSequences : -+ *value = (int)CCtxParams->postBlockSplitter; -+ break; -+ case ZSTD_c_blockSplitterLevel : -+ *value = CCtxParams->preBlockSplitter_level; - break; - case ZSTD_c_useRowMatchFinder : - *value = (int)CCtxParams->useRowMatchFinder; -@@ -980,6 +1083,18 @@ size_t ZSTD_CCtxParams_getParameter( - case ZSTD_c_deterministicRefPrefix: - *value = (int)CCtxParams->deterministicRefPrefix; - break; -+ case ZSTD_c_prefetchCDictTables: -+ *value = (int)CCtxParams->prefetchCDictTables; -+ break; -+ case ZSTD_c_enableSeqProducerFallback: -+ *value = CCtxParams->enableMatchFinderFallback; -+ break; -+ case ZSTD_c_maxBlockSize: -+ *value = (int)CCtxParams->maxBlockSize; -+ break; -+ case ZSTD_c_repcodeResolution: -+ *value = (int)CCtxParams->searchForExternalRepcodes; -+ break; - default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); - } - return 0; -@@ -1006,9 +1121,47 @@ size_t ZSTD_CCtx_setParametersUsingCCtxP - return 0; - } - -+size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams) -+{ -+ ZSTD_STATIC_ASSERT(sizeof(cparams) == 7 * 4 /* all params are listed below */); -+ DEBUGLOG(4, "ZSTD_CCtx_setCParams"); -+ /* only update if all parameters are valid */ -+ FORWARD_IF_ERROR(ZSTD_checkCParams(cparams), ""); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, (int)cparams.windowLog), ""); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_chainLog, (int)cparams.chainLog), ""); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, (int)cparams.hashLog), ""); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_searchLog, (int)cparams.searchLog), ""); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, (int)cparams.minMatch), ""); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetLength, (int)cparams.targetLength), ""); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, (int)cparams.strategy), ""); -+ return 0; -+} -+ -+size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams) -+{ -+ ZSTD_STATIC_ASSERT(sizeof(fparams) == 3 * 4 /* all params are listed below */); -+ DEBUGLOG(4, "ZSTD_CCtx_setFParams"); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, fparams.contentSizeFlag != 0), ""); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, fparams.checksumFlag != 0), ""); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_dictIDFlag, fparams.noDictIDFlag == 0), ""); -+ return 0; -+} -+ -+size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params) -+{ -+ DEBUGLOG(4, "ZSTD_CCtx_setParams"); -+ /* First check cParams, because we want to update all or none. */ -+ FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); -+ /* Next set fParams, because this could fail if the cctx isn't in init stage. */ -+ FORWARD_IF_ERROR(ZSTD_CCtx_setFParams(cctx, params.fParams), ""); -+ /* Finally set cParams, which should succeed. */ -+ FORWARD_IF_ERROR(ZSTD_CCtx_setCParams(cctx, params.cParams), ""); -+ return 0; -+} -+ - size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) - { -- DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); -+ DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %llu bytes", pledgedSrcSize); - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "Can't set pledgedSrcSize when not in init stage."); - cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; -@@ -1024,9 +1177,9 @@ static void ZSTD_dedicatedDictSearch_rev - ZSTD_compressionParameters* cParams); - - /* -- * Initializes the local dict using the requested parameters. -- * NOTE: This does not use the pledged src size, because it may be used for more -- * than one compression. -+ * Initializes the local dictionary using requested parameters. -+ * NOTE: Initialization does not employ the pledged src size, -+ * because the dictionary may be used for multiple compressions. - */ - static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) - { -@@ -1039,8 +1192,8 @@ static size_t ZSTD_initLocalDict(ZSTD_CC - return 0; - } - if (dl->cdict != NULL) { -- assert(cctx->cdict == dl->cdict); - /* Local dictionary already initialized. */ -+ assert(cctx->cdict == dl->cdict); - return 0; - } - assert(dl->dictSize > 0); -@@ -1060,26 +1213,30 @@ static size_t ZSTD_initLocalDict(ZSTD_CC - } - - size_t ZSTD_CCtx_loadDictionary_advanced( -- ZSTD_CCtx* cctx, const void* dict, size_t dictSize, -- ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) -+ ZSTD_CCtx* cctx, -+ const void* dict, size_t dictSize, -+ ZSTD_dictLoadMethod_e dictLoadMethod, -+ ZSTD_dictContentType_e dictContentType) - { -- RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, -- "Can't load a dictionary when ctx is not in init stage."); - DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); -- ZSTD_clearAllDicts(cctx); /* in case one already exists */ -- if (dict == NULL || dictSize == 0) /* no dictionary mode */ -+ RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, -+ "Can't load a dictionary when cctx is not in init stage."); -+ ZSTD_clearAllDicts(cctx); /* erase any previously set dictionary */ -+ if (dict == NULL || dictSize == 0) /* no dictionary */ - return 0; - if (dictLoadMethod == ZSTD_dlm_byRef) { - cctx->localDict.dict = dict; - } else { -+ /* copy dictionary content inside CCtx to own its lifetime */ - void* dictBuffer; - RETURN_ERROR_IF(cctx->staticSize, memory_allocation, -- "no malloc for static CCtx"); -+ "static CCtx can't allocate for an internal copy of dictionary"); - dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem); -- RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!"); -+ RETURN_ERROR_IF(dictBuffer==NULL, memory_allocation, -+ "allocation failed for dictionary content"); - ZSTD_memcpy(dictBuffer, dict, dictSize); -- cctx->localDict.dictBuffer = dictBuffer; -- cctx->localDict.dict = dictBuffer; -+ cctx->localDict.dictBuffer = dictBuffer; /* owned ptr to free */ -+ cctx->localDict.dict = dictBuffer; /* read-only reference */ - } - cctx->localDict.dictSize = dictSize; - cctx->localDict.dictContentType = dictContentType; -@@ -1149,7 +1306,7 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, - if ( (reset == ZSTD_reset_parameters) - || (reset == ZSTD_reset_session_and_parameters) ) { - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, -- "Can't reset parameters only when not in init stage."); -+ "Reset parameters is only possible during init stage."); - ZSTD_clearAllDicts(cctx); - return ZSTD_CCtxParams_reset(&cctx->requestedParams); - } -@@ -1168,7 +1325,7 @@ size_t ZSTD_checkCParams(ZSTD_compressio - BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); - BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); - BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); -- BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); -+ BOUNDCHECK(ZSTD_c_strategy, (int)cParams.strategy); - return 0; - } - -@@ -1178,11 +1335,12 @@ size_t ZSTD_checkCParams(ZSTD_compressio - static ZSTD_compressionParameters - ZSTD_clampCParams(ZSTD_compressionParameters cParams) - { --# define CLAMP_TYPE(cParam, val, type) { \ -- ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ -- if ((int)valbounds.upperBound) val=(type)bounds.upperBound; \ -- } -+# define CLAMP_TYPE(cParam, val, type) \ -+ do { \ -+ ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ -+ if ((int)valbounds.upperBound) val=(type)bounds.upperBound; \ -+ } while (0) - # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) - CLAMP(ZSTD_c_windowLog, cParams.windowLog); - CLAMP(ZSTD_c_chainLog, cParams.chainLog); -@@ -1240,19 +1398,62 @@ static U32 ZSTD_dictAndWindowLog(U32 win - * optimize `cPar` for a specified input (`srcSize` and `dictSize`). - * mostly downsize to reduce memory consumption and initialization latency. - * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. -- * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`. -+ * `mode` is the mode for parameter adjustment. See docs for `ZSTD_CParamMode_e`. - * note : `srcSize==0` means 0! - * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ - static ZSTD_compressionParameters - ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, - unsigned long long srcSize, - size_t dictSize, -- ZSTD_cParamMode_e mode) -+ ZSTD_CParamMode_e mode, -+ ZSTD_ParamSwitch_e useRowMatchFinder) - { - const U64 minSrcSize = 513; /* (1<<9) + 1 */ - const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); - assert(ZSTD_checkCParams(cPar)==0); - -+ /* Cascade the selected strategy down to the next-highest one built into -+ * this binary. */ -+#ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR -+ if (cPar.strategy == ZSTD_btultra2) { -+ cPar.strategy = ZSTD_btultra; -+ } -+ if (cPar.strategy == ZSTD_btultra) { -+ cPar.strategy = ZSTD_btopt; -+ } -+#endif -+#ifdef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR -+ if (cPar.strategy == ZSTD_btopt) { -+ cPar.strategy = ZSTD_btlazy2; -+ } -+#endif -+#ifdef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR -+ if (cPar.strategy == ZSTD_btlazy2) { -+ cPar.strategy = ZSTD_lazy2; -+ } -+#endif -+#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -+ if (cPar.strategy == ZSTD_lazy2) { -+ cPar.strategy = ZSTD_lazy; -+ } -+#endif -+#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -+ if (cPar.strategy == ZSTD_lazy) { -+ cPar.strategy = ZSTD_greedy; -+ } -+#endif -+#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -+ if (cPar.strategy == ZSTD_greedy) { -+ cPar.strategy = ZSTD_dfast; -+ } -+#endif -+#ifdef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR -+ if (cPar.strategy == ZSTD_dfast) { -+ cPar.strategy = ZSTD_fast; -+ cPar.targetLength = 0; -+ } -+#endif -+ - switch (mode) { - case ZSTD_cpm_unknown: - case ZSTD_cpm_noAttachDict: -@@ -1281,8 +1482,8 @@ ZSTD_adjustCParams_internal(ZSTD_compres - } - - /* resize windowLog if input is small enough, to use less memory */ -- if ( (srcSize < maxWindowResize) -- && (dictSize < maxWindowResize) ) { -+ if ( (srcSize <= maxWindowResize) -+ && (dictSize <= maxWindowResize) ) { - U32 const tSize = (U32)(srcSize + dictSize); - static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; - U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : -@@ -1300,6 +1501,42 @@ ZSTD_adjustCParams_internal(ZSTD_compres - if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) - cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ - -+ /* We can't use more than 32 bits of hash in total, so that means that we require: -+ * (hashLog + 8) <= 32 && (chainLog + 8) <= 32 -+ */ -+ if (mode == ZSTD_cpm_createCDict && ZSTD_CDictIndicesAreTagged(&cPar)) { -+ U32 const maxShortCacheHashLog = 32 - ZSTD_SHORT_CACHE_TAG_BITS; -+ if (cPar.hashLog > maxShortCacheHashLog) { -+ cPar.hashLog = maxShortCacheHashLog; -+ } -+ if (cPar.chainLog > maxShortCacheHashLog) { -+ cPar.chainLog = maxShortCacheHashLog; -+ } -+ } -+ -+ -+ /* At this point, we aren't 100% sure if we are using the row match finder. -+ * Unless it is explicitly disabled, conservatively assume that it is enabled. -+ * In this case it will only be disabled for small sources, so shrinking the -+ * hash log a little bit shouldn't result in any ratio loss. -+ */ -+ if (useRowMatchFinder == ZSTD_ps_auto) -+ useRowMatchFinder = ZSTD_ps_enable; -+ -+ /* We can't hash more than 32-bits in total. So that means that we require: -+ * (hashLog - rowLog + 8) <= 32 -+ */ -+ if (ZSTD_rowMatchFinderUsed(cPar.strategy, useRowMatchFinder)) { -+ /* Switch to 32-entry rows if searchLog is 5 (or more) */ -+ U32 const rowLog = BOUNDED(4, cPar.searchLog, 6); -+ U32 const maxRowHashLog = 32 - ZSTD_ROW_HASH_TAG_BITS; -+ U32 const maxHashLog = maxRowHashLog + rowLog; -+ assert(cPar.hashLog >= rowLog); -+ if (cPar.hashLog > maxHashLog) { -+ cPar.hashLog = maxHashLog; -+ } -+ } -+ - return cPar; - } - -@@ -1310,11 +1547,11 @@ ZSTD_adjustCParams(ZSTD_compressionParam - { - cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ - if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; -- return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown); -+ return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown, ZSTD_ps_auto); - } - --static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); --static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); -+static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode); -+static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode); - - static void ZSTD_overrideCParams( - ZSTD_compressionParameters* cParams, -@@ -1330,24 +1567,25 @@ static void ZSTD_overrideCParams( - } - - ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( -- const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) -+ const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode) - { - ZSTD_compressionParameters cParams; - if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { -- srcSizeHint = CCtxParams->srcSizeHint; -+ assert(CCtxParams->srcSizeHint>=0); -+ srcSizeHint = (U64)CCtxParams->srcSizeHint; - } - cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode); - if (CCtxParams->ldmParams.enableLdm == ZSTD_ps_enable) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; - ZSTD_overrideCParams(&cParams, &CCtxParams->cParams); - assert(!ZSTD_checkCParams(cParams)); - /* srcSizeHint == 0 means 0 */ -- return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode); -+ return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode, CCtxParams->useRowMatchFinder); - } - - static size_t - ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, -- const ZSTD_paramSwitch_e useRowMatchFinder, -- const U32 enableDedicatedDictSearch, -+ const ZSTD_ParamSwitch_e useRowMatchFinder, -+ const int enableDedicatedDictSearch, - const U32 forCCtx) - { - /* chain table size should be 0 for fast or row-hash strategies */ -@@ -1363,14 +1601,14 @@ ZSTD_sizeof_matchState(const ZSTD_compre - + hSize * sizeof(U32) - + h3Size * sizeof(U32); - size_t const optPotentialSpace = -- ZSTD_cwksp_aligned_alloc_size((MaxML+1) * sizeof(U32)) -- + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32)) -- + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32)) -- + ZSTD_cwksp_aligned_alloc_size((1<strategy, useRowMatchFinder) -- ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16)) -+ ? ZSTD_cwksp_aligned64_alloc_size(hSize) - : 0; - size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt)) - ? optPotentialSpace -@@ -1386,30 +1624,38 @@ ZSTD_sizeof_matchState(const ZSTD_compre - return tableSpace + optSpace + slackSpace + lazyAdditionalSpace; - } - -+/* Helper function for calculating memory requirements. -+ * Gives a tighter bound than ZSTD_sequenceBound() by taking minMatch into account. */ -+static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useSequenceProducer) { -+ U32 const divider = (minMatch==3 || useSequenceProducer) ? 3 : 4; -+ return blockSize / divider; -+} -+ - static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( - const ZSTD_compressionParameters* cParams, - const ldmParams_t* ldmParams, - const int isStatic, -- const ZSTD_paramSwitch_e useRowMatchFinder, -+ const ZSTD_ParamSwitch_e useRowMatchFinder, - const size_t buffInSize, - const size_t buffOutSize, -- const U64 pledgedSrcSize) -+ const U64 pledgedSrcSize, -+ int useSequenceProducer, -+ size_t maxBlockSize) - { - size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize); -- size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); -- U32 const divider = (cParams->minMatch==3) ? 3 : 4; -- size_t const maxNbSeq = blockSize / divider; -+ size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(maxBlockSize), windowSize); -+ size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useSequenceProducer); - size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) -- + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef)) -+ + ZSTD_cwksp_aligned64_alloc_size(maxNbSeq * sizeof(SeqDef)) - + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); -- size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE); -+ size_t const tmpWorkSpace = ZSTD_cwksp_alloc_size(TMP_WORKSPACE_SIZE); - size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); - size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1); - - size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams); - size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize); - size_t const ldmSeqSpace = ldmParams->enableLdm == ZSTD_ps_enable ? -- ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; -+ ZSTD_cwksp_aligned64_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; - - - size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) -@@ -1417,15 +1663,21 @@ static size_t ZSTD_estimateCCtxSize_usin - - size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; - -+ size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize); -+ size_t const externalSeqSpace = useSequenceProducer -+ ? ZSTD_cwksp_aligned64_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence)) -+ : 0; -+ - size_t const neededSpace = - cctxSpace + -- entropySpace + -+ tmpWorkSpace + - blockStateSpace + - ldmSpace + - ldmSeqSpace + - matchStateSize + - tokenSpace + -- bufferSpace; -+ bufferSpace + -+ externalSeqSpace; - - DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); - return neededSpace; -@@ -1435,7 +1687,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxPa - { - ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); -- ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, -+ ZSTD_ParamSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, - &cParams); - - RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); -@@ -1443,7 +1695,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxPa - * be needed. However, we still allocate two 0-sized buffers, which can - * take space under ASAN. */ - return ZSTD_estimateCCtxSize_usingCCtxParams_internal( -- &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); -+ &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize); - } - - size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) -@@ -1493,18 +1745,18 @@ size_t ZSTD_estimateCStreamSize_usingCCt - RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); - { ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); -- size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); -+ size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(params->maxBlockSize), (size_t)1 << cParams.windowLog); - size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered) - ? ((size_t)1 << cParams.windowLog) + blockSize - : 0; - size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered) - ? ZSTD_compressBound(blockSize) + 1 - : 0; -- ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, ¶ms->cParams); -+ ZSTD_ParamSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, ¶ms->cParams); - - return ZSTD_estimateCCtxSize_usingCCtxParams_internal( - &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize, -- ZSTD_CONTENTSIZE_UNKNOWN); -+ ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize); - } - } - -@@ -1600,7 +1852,7 @@ void ZSTD_reset_compressedBlockState(ZST - * Invalidate all the matches in the match finder tables. - * Requires nextSrc and base to be set (can be NULL). - */ --static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) -+static void ZSTD_invalidateMatchState(ZSTD_MatchState_t* ms) - { - ZSTD_window_clear(&ms->window); - -@@ -1637,12 +1889,25 @@ typedef enum { - ZSTD_resetTarget_CCtx - } ZSTD_resetTarget_e; - -+/* Mixes bits in a 64 bits in a value, based on XXH3_rrmxmx */ -+static U64 ZSTD_bitmix(U64 val, U64 len) { -+ val ^= ZSTD_rotateRight_U64(val, 49) ^ ZSTD_rotateRight_U64(val, 24); -+ val *= 0x9FB21C651E98DF25ULL; -+ val ^= (val >> 35) + len ; -+ val *= 0x9FB21C651E98DF25ULL; -+ return val ^ (val >> 28); -+} -+ -+/* Mixes in the hashSalt and hashSaltEntropy to create a new hashSalt */ -+static void ZSTD_advanceHashSalt(ZSTD_MatchState_t* ms) { -+ ms->hashSalt = ZSTD_bitmix(ms->hashSalt, 8) ^ ZSTD_bitmix((U64) ms->hashSaltEntropy, 4); -+} - - static size_t --ZSTD_reset_matchState(ZSTD_matchState_t* ms, -+ZSTD_reset_matchState(ZSTD_MatchState_t* ms, - ZSTD_cwksp* ws, - const ZSTD_compressionParameters* cParams, -- const ZSTD_paramSwitch_e useRowMatchFinder, -+ const ZSTD_ParamSwitch_e useRowMatchFinder, - const ZSTD_compResetPolicy_e crp, - const ZSTD_indexResetPolicy_e forceResetIndex, - const ZSTD_resetTarget_e forWho) -@@ -1664,6 +1929,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* - } - - ms->hashLog3 = hashLog3; -+ ms->lazySkipping = 0; - - ZSTD_invalidateMatchState(ms); - -@@ -1685,22 +1951,19 @@ ZSTD_reset_matchState(ZSTD_matchState_t* - ZSTD_cwksp_clean_tables(ws); - } - -- /* opt parser space */ -- if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { -- DEBUGLOG(4, "reserving optimal parser space"); -- ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); -- ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); -- ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); -- ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); -- ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); -- } -- - if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) { -- { /* Row match finder needs an additional table of hashes ("tags") */ -- size_t const tagTableSize = hSize*sizeof(U16); -- ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize); -- if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize); -+ /* Row match finder needs an additional table of hashes ("tags") */ -+ size_t const tagTableSize = hSize; -+ /* We want to generate a new salt in case we reset a Cctx, but we always want to use -+ * 0 when we reset a Cdict */ -+ if(forWho == ZSTD_resetTarget_CCtx) { -+ ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned_init_once(ws, tagTableSize); -+ ZSTD_advanceHashSalt(ms); -+ } else { -+ /* When we are not salting we want to always memset the memory */ -+ ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned64(ws, tagTableSize); -+ ZSTD_memset(ms->tagTable, 0, tagTableSize); -+ ms->hashSalt = 0; - } - { /* Switch to 32-entry rows if searchLog is 5 (or more) */ - U32 const rowLog = BOUNDED(4, cParams->searchLog, 6); -@@ -1709,6 +1972,17 @@ ZSTD_reset_matchState(ZSTD_matchState_t* - } - } - -+ /* opt parser space */ -+ if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { -+ DEBUGLOG(4, "reserving optimal parser space"); -+ ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned64(ws, (1<opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned64(ws, (MaxLL+1) * sizeof(unsigned)); -+ ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned64(ws, (MaxML+1) * sizeof(unsigned)); -+ ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned64(ws, (MaxOff+1) * sizeof(unsigned)); -+ ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned64(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_match_t)); -+ ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned64(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t)); -+ } -+ - ms->cParams = *cParams; - - RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, -@@ -1754,7 +2028,7 @@ static size_t ZSTD_resetCCtx_internal(ZS - { - ZSTD_cwksp* const ws = &zc->workspace; - DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d", -- (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->useBlockSplitter); -+ (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->postBlockSplitter); - assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); - - zc->isFirstBlock = 1; -@@ -1766,8 +2040,9 @@ static size_t ZSTD_resetCCtx_internal(ZS - params = &zc->appliedParams; - - assert(params->useRowMatchFinder != ZSTD_ps_auto); -- assert(params->useBlockSplitter != ZSTD_ps_auto); -+ assert(params->postBlockSplitter != ZSTD_ps_auto); - assert(params->ldmParams.enableLdm != ZSTD_ps_auto); -+ assert(params->maxBlockSize != 0); - if (params->ldmParams.enableLdm == ZSTD_ps_enable) { - /* Adjust long distance matching parameters */ - ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, ¶ms->cParams); -@@ -1776,9 +2051,8 @@ static size_t ZSTD_resetCCtx_internal(ZS - } - - { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); -- size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); -- U32 const divider = (params->cParams.minMatch==3) ? 3 : 4; -- size_t const maxNbSeq = blockSize / divider; -+ size_t const blockSize = MIN(params->maxBlockSize, windowSize); -+ size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, ZSTD_hasExtSeqProd(params)); - size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) - ? ZSTD_compressBound(blockSize) + 1 - : 0; -@@ -1795,8 +2069,7 @@ static size_t ZSTD_resetCCtx_internal(ZS - size_t const neededSpace = - ZSTD_estimateCCtxSize_usingCCtxParams_internal( - ¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder, -- buffInSize, buffOutSize, pledgedSrcSize); -- int resizeWorkspace; -+ buffInSize, buffOutSize, pledgedSrcSize, ZSTD_hasExtSeqProd(params), params->maxBlockSize); - - FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); - -@@ -1805,7 +2078,7 @@ static size_t ZSTD_resetCCtx_internal(ZS - { /* Check if workspace is large enough, alloc a new one if needed */ - int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; - int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); -- resizeWorkspace = workspaceTooSmall || workspaceWasteful; -+ int resizeWorkspace = workspaceTooSmall || workspaceWasteful; - DEBUGLOG(4, "Need %zu B workspace", neededSpace); - DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); - -@@ -1823,21 +2096,23 @@ static size_t ZSTD_resetCCtx_internal(ZS - - DEBUGLOG(5, "reserving object space"); - /* Statically sized space. -- * entropyWorkspace never moves, -+ * tmpWorkspace never moves, - * though prev/next block swap places */ - assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); - zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); - RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); - zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); - RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); -- zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE); -- RETURN_ERROR_IF(zc->entropyWorkspace == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); -+ zc->tmpWorkspace = ZSTD_cwksp_reserve_object(ws, TMP_WORKSPACE_SIZE); -+ RETURN_ERROR_IF(zc->tmpWorkspace == NULL, memory_allocation, "couldn't allocate tmpWorkspace"); -+ zc->tmpWkspSize = TMP_WORKSPACE_SIZE; - } } - - ZSTD_cwksp_clear(ws); - - /* init params */ - zc->blockState.matchState.cParams = params->cParams; -+ zc->blockState.matchState.prefetchCDictTables = params->prefetchCDictTables == ZSTD_ps_enable; - zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; - zc->consumedSrcSize = 0; - zc->producedCSize = 0; -@@ -1845,7 +2120,7 @@ static size_t ZSTD_resetCCtx_internal(ZS - zc->appliedParams.fParams.contentSizeFlag = 0; - DEBUGLOG(4, "pledged content size : %u ; flag : %u", - (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); -- zc->blockSize = blockSize; -+ zc->blockSizeMax = blockSize; - - xxh64_reset(&zc->xxhState, 0); - zc->stage = ZSTDcs_init; -@@ -1854,13 +2129,46 @@ static size_t ZSTD_resetCCtx_internal(ZS - - ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); - -+ FORWARD_IF_ERROR(ZSTD_reset_matchState( -+ &zc->blockState.matchState, -+ ws, -+ ¶ms->cParams, -+ params->useRowMatchFinder, -+ crp, -+ needsIndexReset, -+ ZSTD_resetTarget_CCtx), ""); -+ -+ zc->seqStore.sequencesStart = (SeqDef*)ZSTD_cwksp_reserve_aligned64(ws, maxNbSeq * sizeof(SeqDef)); -+ -+ /* ldm hash table */ -+ if (params->ldmParams.enableLdm == ZSTD_ps_enable) { -+ /* TODO: avoid memset? */ -+ size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog; -+ zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned64(ws, ldmHSize * sizeof(ldmEntry_t)); -+ ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); -+ zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned64(ws, maxNbLdmSeq * sizeof(rawSeq)); -+ zc->maxNbLdmSequences = maxNbLdmSeq; -+ -+ ZSTD_window_init(&zc->ldmState.window); -+ zc->ldmState.loadedDictEnd = 0; -+ } -+ -+ /* reserve space for block-level external sequences */ -+ if (ZSTD_hasExtSeqProd(params)) { -+ size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize); -+ zc->extSeqBufCapacity = maxNbExternalSeq; -+ zc->extSeqBuf = -+ (ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned64(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence)); -+ } -+ -+ /* buffers */ -+ - /* ZSTD_wildcopy() is used to copy into the literals buffer, - * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. - */ - zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); - zc->seqStore.maxNbLit = blockSize; - -- /* buffers */ - zc->bufferedPolicy = zbuff; - zc->inBuffSize = buffInSize; - zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); -@@ -1883,32 +2191,9 @@ static size_t ZSTD_resetCCtx_internal(ZS - zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); - zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); - zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); -- zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); -- -- FORWARD_IF_ERROR(ZSTD_reset_matchState( -- &zc->blockState.matchState, -- ws, -- ¶ms->cParams, -- params->useRowMatchFinder, -- crp, -- needsIndexReset, -- ZSTD_resetTarget_CCtx), ""); -- -- /* ldm hash table */ -- if (params->ldmParams.enableLdm == ZSTD_ps_enable) { -- /* TODO: avoid memset? */ -- size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog; -- zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); -- ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); -- zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); -- zc->maxNbLdmSequences = maxNbLdmSeq; -- -- ZSTD_window_init(&zc->ldmState.window); -- zc->ldmState.loadedDictEnd = 0; -- } - - DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); -- assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace)); -+ assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace)); - - zc->initialized = 1; - -@@ -1980,7 +2265,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCt - } - - params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, -- cdict->dictContentSize, ZSTD_cpm_attachDict); -+ cdict->dictContentSize, ZSTD_cpm_attachDict, -+ params.useRowMatchFinder); - params.cParams.windowLog = windowLog; - params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */ - FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, -@@ -2019,6 +2305,22 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCt - return 0; - } - -+static void ZSTD_copyCDictTableIntoCCtx(U32* dst, U32 const* src, size_t tableSize, -+ ZSTD_compressionParameters const* cParams) { -+ if (ZSTD_CDictIndicesAreTagged(cParams)){ -+ /* Remove tags from the CDict table if they are present. -+ * See docs on "short cache" in zstd_compress_internal.h for context. */ -+ size_t i; -+ for (i = 0; i < tableSize; i++) { -+ U32 const taggedIndex = src[i]; -+ U32 const index = taggedIndex >> ZSTD_SHORT_CACHE_TAG_BITS; -+ dst[i] = index; -+ } -+ } else { -+ ZSTD_memcpy(dst, src, tableSize * sizeof(U32)); -+ } -+} -+ - static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, - const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, -@@ -2054,26 +2356,29 @@ static size_t ZSTD_resetCCtx_byCopyingCD - : 0; - size_t const hSize = (size_t)1 << cdict_cParams->hashLog; - -- ZSTD_memcpy(cctx->blockState.matchState.hashTable, -- cdict->matchState.hashTable, -- hSize * sizeof(U32)); -+ ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.hashTable, -+ cdict->matchState.hashTable, -+ hSize, cdict_cParams); -+ - /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */ - if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) { -- ZSTD_memcpy(cctx->blockState.matchState.chainTable, -- cdict->matchState.chainTable, -- chainSize * sizeof(U32)); -+ ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.chainTable, -+ cdict->matchState.chainTable, -+ chainSize, cdict_cParams); - } - /* copy tag table */ - if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) { -- size_t const tagTableSize = hSize*sizeof(U16); -+ size_t const tagTableSize = hSize; - ZSTD_memcpy(cctx->blockState.matchState.tagTable, -- cdict->matchState.tagTable, -- tagTableSize); -+ cdict->matchState.tagTable, -+ tagTableSize); -+ cctx->blockState.matchState.hashSalt = cdict->matchState.hashSalt; - } - } - - /* Zero the hashTable3, since the cdict never fills it */ -- { int const h3log = cctx->blockState.matchState.hashLog3; -+ assert(cctx->blockState.matchState.hashLog3 <= 31); -+ { U32 const h3log = cctx->blockState.matchState.hashLog3; - size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; - assert(cdict->matchState.hashLog3 == 0); - ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); -@@ -2082,8 +2387,8 @@ static size_t ZSTD_resetCCtx_byCopyingCD - ZSTD_cwksp_mark_tables_clean(&cctx->workspace); - - /* copy dictionary offsets */ -- { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; -- ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; -+ { ZSTD_MatchState_t const* srcMatchState = &cdict->matchState; -+ ZSTD_MatchState_t* dstMatchState = &cctx->blockState.matchState; - dstMatchState->window = srcMatchState->window; - dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; - dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; -@@ -2141,12 +2446,13 @@ static size_t ZSTD_copyCCtx_internal(ZST - /* Copy only compression parameters related to tables. */ - params.cParams = srcCCtx->appliedParams.cParams; - assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_ps_auto); -- assert(srcCCtx->appliedParams.useBlockSplitter != ZSTD_ps_auto); -+ assert(srcCCtx->appliedParams.postBlockSplitter != ZSTD_ps_auto); - assert(srcCCtx->appliedParams.ldmParams.enableLdm != ZSTD_ps_auto); - params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder; -- params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter; -+ params.postBlockSplitter = srcCCtx->appliedParams.postBlockSplitter; - params.ldmParams = srcCCtx->appliedParams.ldmParams; - params.fParams = fParams; -+ params.maxBlockSize = srcCCtx->appliedParams.maxBlockSize; - ZSTD_resetCCtx_internal(dstCCtx, ¶ms, pledgedSrcSize, - /* loadedDictSize */ 0, - ZSTDcrp_leaveDirty, zbuff); -@@ -2166,7 +2472,7 @@ static size_t ZSTD_copyCCtx_internal(ZST - ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog) - : 0; - size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; -- int const h3log = srcCCtx->blockState.matchState.hashLog3; -+ U32 const h3log = srcCCtx->blockState.matchState.hashLog3; - size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; - - ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable, -@@ -2184,8 +2490,8 @@ static size_t ZSTD_copyCCtx_internal(ZST - - /* copy dictionary offsets */ - { -- const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; -- ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; -+ const ZSTD_MatchState_t* srcMatchState = &srcCCtx->blockState.matchState; -+ ZSTD_MatchState_t* dstMatchState = &dstCCtx->blockState.matchState; - dstMatchState->window = srcMatchState->window; - dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; - dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; -@@ -2234,7 +2540,7 @@ ZSTD_reduceTable_internal (U32* const ta - /* Protect special index values < ZSTD_WINDOW_START_INDEX. */ - U32 const reducerThreshold = reducerValue + ZSTD_WINDOW_START_INDEX; - assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ -- assert(size < (1U<<31)); /* can be casted to int */ -+ assert(size < (1U<<31)); /* can be cast to int */ - - - for (rowNb=0 ; rowNb < nbRows ; rowNb++) { -@@ -2267,7 +2573,7 @@ static void ZSTD_reduceTable_btlazy2(U32 - - /*! ZSTD_reduceIndex() : - * rescale all indexes to avoid future overflow (indexes are U32) */ --static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue) -+static void ZSTD_reduceIndex (ZSTD_MatchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue) - { - { U32 const hSize = (U32)1 << params->cParams.hashLog; - ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); -@@ -2294,26 +2600,32 @@ static void ZSTD_reduceIndex (ZSTD_match - - /* See doc/zstd_compression_format.md for detailed format description */ - --void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) -+int ZSTD_seqToCodes(const SeqStore_t* seqStorePtr) - { -- const seqDef* const sequences = seqStorePtr->sequencesStart; -+ const SeqDef* const sequences = seqStorePtr->sequencesStart; - BYTE* const llCodeTable = seqStorePtr->llCode; - BYTE* const ofCodeTable = seqStorePtr->ofCode; - BYTE* const mlCodeTable = seqStorePtr->mlCode; - U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - U32 u; -+ int longOffsets = 0; - assert(nbSeq <= seqStorePtr->maxNbSeq); - for (u=0; u= STREAM_ACCUMULATOR_MIN)); -+ if (MEM_32bits() && ofCode >= STREAM_ACCUMULATOR_MIN) -+ longOffsets = 1; - } - if (seqStorePtr->longLengthType==ZSTD_llt_literalLength) - llCodeTable[seqStorePtr->longLengthPos] = MaxLL; - if (seqStorePtr->longLengthType==ZSTD_llt_matchLength) - mlCodeTable[seqStorePtr->longLengthPos] = MaxML; -+ return longOffsets; - } - - /* ZSTD_useTargetCBlockSize(): -@@ -2333,9 +2645,9 @@ static int ZSTD_useTargetCBlockSize(cons - * Returns 1 if true, 0 otherwise. */ - static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams) - { -- DEBUGLOG(5, "ZSTD_blockSplitterEnabled (useBlockSplitter=%d)", cctxParams->useBlockSplitter); -- assert(cctxParams->useBlockSplitter != ZSTD_ps_auto); -- return (cctxParams->useBlockSplitter == ZSTD_ps_enable); -+ DEBUGLOG(5, "ZSTD_blockSplitterEnabled (postBlockSplitter=%d)", cctxParams->postBlockSplitter); -+ assert(cctxParams->postBlockSplitter != ZSTD_ps_auto); -+ return (cctxParams->postBlockSplitter == ZSTD_ps_enable); - } - - /* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types -@@ -2347,6 +2659,7 @@ typedef struct { - U32 MLtype; - size_t size; - size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ -+ int longOffsets; - } ZSTD_symbolEncodingTypeStats_t; - - /* ZSTD_buildSequencesStatistics(): -@@ -2357,11 +2670,13 @@ typedef struct { - * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32) - */ - static ZSTD_symbolEncodingTypeStats_t --ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, -- const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, -- BYTE* dst, const BYTE* const dstEnd, -- ZSTD_strategy strategy, unsigned* countWorkspace, -- void* entropyWorkspace, size_t entropyWkspSize) { -+ZSTD_buildSequencesStatistics( -+ const SeqStore_t* seqStorePtr, size_t nbSeq, -+ const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, -+ BYTE* dst, const BYTE* const dstEnd, -+ ZSTD_strategy strategy, unsigned* countWorkspace, -+ void* entropyWorkspace, size_t entropyWkspSize) -+{ - BYTE* const ostart = dst; - const BYTE* const oend = dstEnd; - BYTE* op = ostart; -@@ -2375,7 +2690,7 @@ ZSTD_buildSequencesStatistics(seqStore_t - - stats.lastCountSize = 0; - /* convert length/distances into codes */ -- ZSTD_seqToCodes(seqStorePtr); -+ stats.longOffsets = ZSTD_seqToCodes(seqStorePtr); - assert(op <= oend); - assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */ - /* build CTable for Literal Lengths */ -@@ -2392,7 +2707,7 @@ ZSTD_buildSequencesStatistics(seqStore_t - assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), -- CTable_LitLength, LLFSELog, (symbolEncodingType_e)stats.LLtype, -+ CTable_LitLength, LLFSELog, (SymbolEncodingType_e)stats.LLtype, - countWorkspace, max, llCodeTable, nbSeq, - LL_defaultNorm, LL_defaultNormLog, MaxLL, - prevEntropy->litlengthCTable, -@@ -2413,7 +2728,7 @@ ZSTD_buildSequencesStatistics(seqStore_t - size_t const mostFrequent = HIST_countFast_wksp( - countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ - /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ -- ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; -+ ZSTD_DefaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; - DEBUGLOG(5, "Building OF table"); - nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; - stats.Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, -@@ -2424,7 +2739,7 @@ ZSTD_buildSequencesStatistics(seqStore_t - assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), -- CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)stats.Offtype, -+ CTable_OffsetBits, OffFSELog, (SymbolEncodingType_e)stats.Offtype, - countWorkspace, max, ofCodeTable, nbSeq, - OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, - prevEntropy->offcodeCTable, -@@ -2454,7 +2769,7 @@ ZSTD_buildSequencesStatistics(seqStore_t - assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), -- CTable_MatchLength, MLFSELog, (symbolEncodingType_e)stats.MLtype, -+ CTable_MatchLength, MLFSELog, (SymbolEncodingType_e)stats.MLtype, - countWorkspace, max, mlCodeTable, nbSeq, - ML_defaultNorm, ML_defaultNormLog, MaxML, - prevEntropy->matchlengthCTable, -@@ -2480,22 +2795,23 @@ ZSTD_buildSequencesStatistics(seqStore_t - */ - #define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20 - MEM_STATIC size_t --ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr, -- const ZSTD_entropyCTables_t* prevEntropy, -- ZSTD_entropyCTables_t* nextEntropy, -- const ZSTD_CCtx_params* cctxParams, -- void* dst, size_t dstCapacity, -- void* entropyWorkspace, size_t entropyWkspSize, -- const int bmi2) -+ZSTD_entropyCompressSeqStore_internal( -+ void* dst, size_t dstCapacity, -+ const void* literals, size_t litSize, -+ const SeqStore_t* seqStorePtr, -+ const ZSTD_entropyCTables_t* prevEntropy, -+ ZSTD_entropyCTables_t* nextEntropy, -+ const ZSTD_CCtx_params* cctxParams, -+ void* entropyWorkspace, size_t entropyWkspSize, -+ const int bmi2) - { -- const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; - ZSTD_strategy const strategy = cctxParams->cParams.strategy; - unsigned* count = (unsigned*)entropyWorkspace; - FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; - FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; - FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; -- const seqDef* const sequences = seqStorePtr->sequencesStart; -- const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; -+ const SeqDef* const sequences = seqStorePtr->sequencesStart; -+ const size_t nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - const BYTE* const ofCodeTable = seqStorePtr->ofCode; - const BYTE* const llCodeTable = seqStorePtr->llCode; - const BYTE* const mlCodeTable = seqStorePtr->mlCode; -@@ -2503,29 +2819,28 @@ ZSTD_entropyCompressSeqStore_internal(se - BYTE* const oend = ostart + dstCapacity; - BYTE* op = ostart; - size_t lastCountSize; -+ int longOffsets = 0; - - entropyWorkspace = count + (MaxSeq + 1); - entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); - -- DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq); -+ DEBUGLOG(5, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu, dstCapacity=%zu)", nbSeq, dstCapacity); - ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= HUF_WORKSPACE_SIZE); - - /* Compress literals */ -- { const BYTE* const literals = seqStorePtr->litStart; -- size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart; -- size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart; -+ { size_t const numSequences = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - /* Base suspicion of uncompressibility on ratio of literals to sequences */ -- unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO); -- size_t const litSize = (size_t)(seqStorePtr->lit - literals); -+ int const suspectUncompressible = (numSequences == 0) || (litSize / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO); -+ - size_t const cSize = ZSTD_compressLiterals( -- &prevEntropy->huf, &nextEntropy->huf, -- cctxParams->cParams.strategy, -- ZSTD_literalsCompressionIsDisabled(cctxParams), - op, dstCapacity, - literals, litSize, - entropyWorkspace, entropyWkspSize, -- bmi2, suspectUncompressible); -+ &prevEntropy->huf, &nextEntropy->huf, -+ cctxParams->cParams.strategy, -+ ZSTD_literalsCompressionIsDisabled(cctxParams), -+ suspectUncompressible, bmi2); - FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); - assert(cSize <= dstCapacity); - op += cSize; -@@ -2551,11 +2866,10 @@ ZSTD_entropyCompressSeqStore_internal(se - ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); - return (size_t)(op - ostart); - } -- { -- ZSTD_symbolEncodingTypeStats_t stats; -- BYTE* seqHead = op++; -+ { BYTE* const seqHead = op++; - /* build stats for sequences */ -- stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, -+ const ZSTD_symbolEncodingTypeStats_t stats = -+ ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, - &prevEntropy->fse, &nextEntropy->fse, - op, oend, - strategy, count, -@@ -2564,6 +2878,7 @@ ZSTD_entropyCompressSeqStore_internal(se - *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2)); - lastCountSize = stats.lastCountSize; - op += stats.size; -+ longOffsets = stats.longOffsets; - } - - { size_t const bitstreamSize = ZSTD_encodeSequences( -@@ -2597,104 +2912,146 @@ ZSTD_entropyCompressSeqStore_internal(se - return (size_t)(op - ostart); - } - --MEM_STATIC size_t --ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr, -- const ZSTD_entropyCTables_t* prevEntropy, -- ZSTD_entropyCTables_t* nextEntropy, -- const ZSTD_CCtx_params* cctxParams, -- void* dst, size_t dstCapacity, -- size_t srcSize, -- void* entropyWorkspace, size_t entropyWkspSize, -- int bmi2) -+static size_t -+ZSTD_entropyCompressSeqStore_wExtLitBuffer( -+ void* dst, size_t dstCapacity, -+ const void* literals, size_t litSize, -+ size_t blockSize, -+ const SeqStore_t* seqStorePtr, -+ const ZSTD_entropyCTables_t* prevEntropy, -+ ZSTD_entropyCTables_t* nextEntropy, -+ const ZSTD_CCtx_params* cctxParams, -+ void* entropyWorkspace, size_t entropyWkspSize, -+ int bmi2) - { - size_t const cSize = ZSTD_entropyCompressSeqStore_internal( -- seqStorePtr, prevEntropy, nextEntropy, cctxParams, - dst, dstCapacity, -+ literals, litSize, -+ seqStorePtr, prevEntropy, nextEntropy, cctxParams, - entropyWorkspace, entropyWkspSize, bmi2); - if (cSize == 0) return 0; - /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. - * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. - */ -- if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) -+ if ((cSize == ERROR(dstSize_tooSmall)) & (blockSize <= dstCapacity)) { -+ DEBUGLOG(4, "not enough dstCapacity (%zu) for ZSTD_entropyCompressSeqStore_internal()=> do not compress block", dstCapacity); - return 0; /* block not compressed */ -+ } - FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed"); - - /* Check compressibility */ -- { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); -+ { size_t const maxCSize = blockSize - ZSTD_minGain(blockSize, cctxParams->cParams.strategy); - if (cSize >= maxCSize) return 0; /* block not compressed */ - } -- DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize); -+ DEBUGLOG(5, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize); -+ /* libzstd decoder before > v1.5.4 is not compatible with compressed blocks of size ZSTD_BLOCKSIZE_MAX exactly. -+ * This restriction is indirectly already fulfilled by respecting ZSTD_minGain() condition above. -+ */ -+ assert(cSize < ZSTD_BLOCKSIZE_MAX); - return cSize; - } - -+static size_t -+ZSTD_entropyCompressSeqStore( -+ const SeqStore_t* seqStorePtr, -+ const ZSTD_entropyCTables_t* prevEntropy, -+ ZSTD_entropyCTables_t* nextEntropy, -+ const ZSTD_CCtx_params* cctxParams, -+ void* dst, size_t dstCapacity, -+ size_t srcSize, -+ void* entropyWorkspace, size_t entropyWkspSize, -+ int bmi2) -+{ -+ return ZSTD_entropyCompressSeqStore_wExtLitBuffer( -+ dst, dstCapacity, -+ seqStorePtr->litStart, (size_t)(seqStorePtr->lit - seqStorePtr->litStart), -+ srcSize, -+ seqStorePtr, -+ prevEntropy, nextEntropy, -+ cctxParams, -+ entropyWorkspace, entropyWkspSize, -+ bmi2); -+} -+ - /* ZSTD_selectBlockCompressor() : - * Not static, but internal use only (used by long distance matcher) - * assumption : strat is a valid strategy */ --ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e useRowMatchFinder, ZSTD_dictMode_e dictMode) -+ZSTD_BlockCompressor_f ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_ParamSwitch_e useRowMatchFinder, ZSTD_dictMode_e dictMode) - { -- static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { -+ static const ZSTD_BlockCompressor_f blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { - { ZSTD_compressBlock_fast /* default for 0 */, - ZSTD_compressBlock_fast, -- ZSTD_compressBlock_doubleFast, -- ZSTD_compressBlock_greedy, -- ZSTD_compressBlock_lazy, -- ZSTD_compressBlock_lazy2, -- ZSTD_compressBlock_btlazy2, -- ZSTD_compressBlock_btopt, -- ZSTD_compressBlock_btultra, -- ZSTD_compressBlock_btultra2 }, -+ ZSTD_COMPRESSBLOCK_DOUBLEFAST, -+ ZSTD_COMPRESSBLOCK_GREEDY, -+ ZSTD_COMPRESSBLOCK_LAZY, -+ ZSTD_COMPRESSBLOCK_LAZY2, -+ ZSTD_COMPRESSBLOCK_BTLAZY2, -+ ZSTD_COMPRESSBLOCK_BTOPT, -+ ZSTD_COMPRESSBLOCK_BTULTRA, -+ ZSTD_COMPRESSBLOCK_BTULTRA2 -+ }, - { ZSTD_compressBlock_fast_extDict /* default for 0 */, - ZSTD_compressBlock_fast_extDict, -- ZSTD_compressBlock_doubleFast_extDict, -- ZSTD_compressBlock_greedy_extDict, -- ZSTD_compressBlock_lazy_extDict, -- ZSTD_compressBlock_lazy2_extDict, -- ZSTD_compressBlock_btlazy2_extDict, -- ZSTD_compressBlock_btopt_extDict, -- ZSTD_compressBlock_btultra_extDict, -- ZSTD_compressBlock_btultra_extDict }, -+ ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT, -+ ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT, -+ ZSTD_COMPRESSBLOCK_LAZY_EXTDICT, -+ ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT, -+ ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT, -+ ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT, -+ ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT, -+ ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT -+ }, - { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, - ZSTD_compressBlock_fast_dictMatchState, -- ZSTD_compressBlock_doubleFast_dictMatchState, -- ZSTD_compressBlock_greedy_dictMatchState, -- ZSTD_compressBlock_lazy_dictMatchState, -- ZSTD_compressBlock_lazy2_dictMatchState, -- ZSTD_compressBlock_btlazy2_dictMatchState, -- ZSTD_compressBlock_btopt_dictMatchState, -- ZSTD_compressBlock_btultra_dictMatchState, -- ZSTD_compressBlock_btultra_dictMatchState }, -+ ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE, -+ ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE, -+ ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE, -+ ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE, -+ ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE, -+ ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE, -+ ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE, -+ ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE -+ }, - { NULL /* default for 0 */, - NULL, - NULL, -- ZSTD_compressBlock_greedy_dedicatedDictSearch, -- ZSTD_compressBlock_lazy_dedicatedDictSearch, -- ZSTD_compressBlock_lazy2_dedicatedDictSearch, -+ ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH, -+ ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH, -+ ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH, - NULL, - NULL, - NULL, - NULL } - }; -- ZSTD_blockCompressor selectedCompressor; -+ ZSTD_BlockCompressor_f selectedCompressor; - ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); - -- assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); -- DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder); -+ assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat)); -+ DEBUGLOG(5, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder); - if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) { -- static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = { -- { ZSTD_compressBlock_greedy_row, -- ZSTD_compressBlock_lazy_row, -- ZSTD_compressBlock_lazy2_row }, -- { ZSTD_compressBlock_greedy_extDict_row, -- ZSTD_compressBlock_lazy_extDict_row, -- ZSTD_compressBlock_lazy2_extDict_row }, -- { ZSTD_compressBlock_greedy_dictMatchState_row, -- ZSTD_compressBlock_lazy_dictMatchState_row, -- ZSTD_compressBlock_lazy2_dictMatchState_row }, -- { ZSTD_compressBlock_greedy_dedicatedDictSearch_row, -- ZSTD_compressBlock_lazy_dedicatedDictSearch_row, -- ZSTD_compressBlock_lazy2_dedicatedDictSearch_row } -+ static const ZSTD_BlockCompressor_f rowBasedBlockCompressors[4][3] = { -+ { -+ ZSTD_COMPRESSBLOCK_GREEDY_ROW, -+ ZSTD_COMPRESSBLOCK_LAZY_ROW, -+ ZSTD_COMPRESSBLOCK_LAZY2_ROW -+ }, -+ { -+ ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW, -+ ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW, -+ ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW -+ }, -+ { -+ ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW, -+ ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW, -+ ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW -+ }, -+ { -+ ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW, -+ ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW, -+ ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW -+ } - }; -- DEBUGLOG(4, "Selecting a row-based matchfinder"); -+ DEBUGLOG(5, "Selecting a row-based matchfinder"); - assert(useRowMatchFinder != ZSTD_ps_auto); - selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy]; - } else { -@@ -2704,30 +3061,126 @@ ZSTD_blockCompressor ZSTD_selectBlockCom - return selectedCompressor; - } - --static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, -+static void ZSTD_storeLastLiterals(SeqStore_t* seqStorePtr, - const BYTE* anchor, size_t lastLLSize) - { - ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } - --void ZSTD_resetSeqStore(seqStore_t* ssPtr) -+void ZSTD_resetSeqStore(SeqStore_t* ssPtr) - { - ssPtr->lit = ssPtr->litStart; - ssPtr->sequences = ssPtr->sequencesStart; - ssPtr->longLengthType = ZSTD_llt_none; - } - --typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; -+/* ZSTD_postProcessSequenceProducerResult() : -+ * Validates and post-processes sequences obtained through the external matchfinder API: -+ * - Checks whether nbExternalSeqs represents an error condition. -+ * - Appends a block delimiter to outSeqs if one is not already present. -+ * See zstd.h for context regarding block delimiters. -+ * Returns the number of sequences after post-processing, or an error code. */ -+static size_t ZSTD_postProcessSequenceProducerResult( -+ ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, size_t srcSize -+) { -+ RETURN_ERROR_IF( -+ nbExternalSeqs > outSeqsCapacity, -+ sequenceProducer_failed, -+ "External sequence producer returned error code %lu", -+ (unsigned long)nbExternalSeqs -+ ); -+ -+ RETURN_ERROR_IF( -+ nbExternalSeqs == 0 && srcSize > 0, -+ sequenceProducer_failed, -+ "Got zero sequences from external sequence producer for a non-empty src buffer!" -+ ); -+ -+ if (srcSize == 0) { -+ ZSTD_memset(&outSeqs[0], 0, sizeof(ZSTD_Sequence)); -+ return 1; -+ } -+ -+ { -+ ZSTD_Sequence const lastSeq = outSeqs[nbExternalSeqs - 1]; -+ -+ /* We can return early if lastSeq is already a block delimiter. */ -+ if (lastSeq.offset == 0 && lastSeq.matchLength == 0) { -+ return nbExternalSeqs; -+ } -+ -+ /* This error condition is only possible if the external matchfinder -+ * produced an invalid parse, by definition of ZSTD_sequenceBound(). */ -+ RETURN_ERROR_IF( -+ nbExternalSeqs == outSeqsCapacity, -+ sequenceProducer_failed, -+ "nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!" -+ ); -+ -+ /* lastSeq is not a block delimiter, so we need to append one. */ -+ ZSTD_memset(&outSeqs[nbExternalSeqs], 0, sizeof(ZSTD_Sequence)); -+ return nbExternalSeqs + 1; -+ } -+} -+ -+/* ZSTD_fastSequenceLengthSum() : -+ * Returns sum(litLen) + sum(matchLen) + lastLits for *seqBuf*. -+ * Similar to another function in zstd_compress.c (determine_blockSize), -+ * except it doesn't check for a block delimiter to end summation. -+ * Removing the early exit allows the compiler to auto-vectorize (https://godbolt.org/z/cY1cajz9P). -+ * This function can be deleted and replaced by determine_blockSize after we resolve issue #3456. */ -+static size_t ZSTD_fastSequenceLengthSum(ZSTD_Sequence const* seqBuf, size_t seqBufSize) { -+ size_t matchLenSum, litLenSum, i; -+ matchLenSum = 0; -+ litLenSum = 0; -+ for (i = 0; i < seqBufSize; i++) { -+ litLenSum += seqBuf[i].litLength; -+ matchLenSum += seqBuf[i].matchLength; -+ } -+ return litLenSum + matchLenSum; -+} -+ -+/* -+ * Function to validate sequences produced by a block compressor. -+ */ -+static void ZSTD_validateSeqStore(const SeqStore_t* seqStore, const ZSTD_compressionParameters* cParams) -+{ -+#if DEBUGLEVEL >= 1 -+ const SeqDef* seq = seqStore->sequencesStart; -+ const SeqDef* const seqEnd = seqStore->sequences; -+ size_t const matchLenLowerBound = cParams->minMatch == 3 ? 3 : 4; -+ for (; seq < seqEnd; ++seq) { -+ const ZSTD_SequenceLength seqLength = ZSTD_getSequenceLength(seqStore, seq); -+ assert(seqLength.matchLength >= matchLenLowerBound); -+ (void)seqLength; -+ (void)matchLenLowerBound; -+ } -+#else -+ (void)seqStore; -+ (void)cParams; -+#endif -+} -+ -+static size_t -+ZSTD_transferSequences_wBlockDelim(ZSTD_CCtx* cctx, -+ ZSTD_SequencePosition* seqPos, -+ const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, -+ const void* src, size_t blockSize, -+ ZSTD_ParamSwitch_e externalRepSearch); -+ -+typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_BuildSeqStore_e; - - static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) - { -- ZSTD_matchState_t* const ms = &zc->blockState.matchState; -+ ZSTD_MatchState_t* const ms = &zc->blockState.matchState; - DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); - assert(srcSize <= ZSTD_BLOCKSIZE_MAX); - /* Assert that we have correctly flushed the ctx params into the ms's copy */ - ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); -- if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { -+ /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding -+ * additional 1. We need to revisit and change this logic to be more consistent */ -+ if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { - if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { - ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize); - } else { -@@ -2763,6 +3216,15 @@ static size_t ZSTD_buildSeqStore(ZSTD_CC - } - if (zc->externSeqStore.pos < zc->externSeqStore.size) { - assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable); -+ -+ /* External matchfinder + LDM is technically possible, just not implemented yet. -+ * We need to revisit soon and implement it. */ -+ RETURN_ERROR_IF( -+ ZSTD_hasExtSeqProd(&zc->appliedParams), -+ parameter_combination_unsupported, -+ "Long-distance matching with external sequence producer enabled is not currently supported." -+ ); -+ - /* Updates ldmSeqStore.pos */ - lastLLSize = - ZSTD_ldm_blockCompress(&zc->externSeqStore, -@@ -2772,7 +3234,15 @@ static size_t ZSTD_buildSeqStore(ZSTD_CC - src, srcSize); - assert(zc->externSeqStore.pos <= zc->externSeqStore.size); - } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { -- rawSeqStore_t ldmSeqStore = kNullRawSeqStore; -+ RawSeqStore_t ldmSeqStore = kNullRawSeqStore; -+ -+ /* External matchfinder + LDM is technically possible, just not implemented yet. -+ * We need to revisit soon and implement it. */ -+ RETURN_ERROR_IF( -+ ZSTD_hasExtSeqProd(&zc->appliedParams), -+ parameter_combination_unsupported, -+ "Long-distance matching with external sequence producer enabled is not currently supported." -+ ); - - ldmSeqStore.seq = zc->ldmSequences; - ldmSeqStore.capacity = zc->maxNbLdmSequences; -@@ -2788,42 +3258,116 @@ static size_t ZSTD_buildSeqStore(ZSTD_CC - zc->appliedParams.useRowMatchFinder, - src, srcSize); - assert(ldmSeqStore.pos == ldmSeqStore.size); -- } else { /* not long range mode */ -- ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, -- zc->appliedParams.useRowMatchFinder, -- dictMode); -+ } else if (ZSTD_hasExtSeqProd(&zc->appliedParams)) { -+ assert( -+ zc->extSeqBufCapacity >= ZSTD_sequenceBound(srcSize) -+ ); -+ assert(zc->appliedParams.extSeqProdFunc != NULL); -+ -+ { U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog; -+ -+ size_t const nbExternalSeqs = (zc->appliedParams.extSeqProdFunc)( -+ zc->appliedParams.extSeqProdState, -+ zc->extSeqBuf, -+ zc->extSeqBufCapacity, -+ src, srcSize, -+ NULL, 0, /* dict and dictSize, currently not supported */ -+ zc->appliedParams.compressionLevel, -+ windowSize -+ ); -+ -+ size_t const nbPostProcessedSeqs = ZSTD_postProcessSequenceProducerResult( -+ zc->extSeqBuf, -+ nbExternalSeqs, -+ zc->extSeqBufCapacity, -+ srcSize -+ ); -+ -+ /* Return early if there is no error, since we don't need to worry about last literals */ -+ if (!ZSTD_isError(nbPostProcessedSeqs)) { -+ ZSTD_SequencePosition seqPos = {0,0,0}; -+ size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->extSeqBuf, nbPostProcessedSeqs); -+ RETURN_ERROR_IF(seqLenSum > srcSize, externalSequences_invalid, "External sequences imply too large a block!"); -+ FORWARD_IF_ERROR( -+ ZSTD_transferSequences_wBlockDelim( -+ zc, &seqPos, -+ zc->extSeqBuf, nbPostProcessedSeqs, -+ src, srcSize, -+ zc->appliedParams.searchForExternalRepcodes -+ ), -+ "Failed to copy external sequences to seqStore!" -+ ); -+ ms->ldmSeqStore = NULL; -+ DEBUGLOG(5, "Copied %lu sequences from external sequence producer to internal seqStore.", (unsigned long)nbExternalSeqs); -+ return ZSTDbss_compress; -+ } -+ -+ /* Propagate the error if fallback is disabled */ -+ if (!zc->appliedParams.enableMatchFinderFallback) { -+ return nbPostProcessedSeqs; -+ } -+ -+ /* Fallback to software matchfinder */ -+ { ZSTD_BlockCompressor_f const blockCompressor = -+ ZSTD_selectBlockCompressor( -+ zc->appliedParams.cParams.strategy, -+ zc->appliedParams.useRowMatchFinder, -+ dictMode); -+ ms->ldmSeqStore = NULL; -+ DEBUGLOG( -+ 5, -+ "External sequence producer returned error code %lu. Falling back to internal parser.", -+ (unsigned long)nbExternalSeqs -+ ); -+ lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); -+ } } -+ } else { /* not long range mode and no external matchfinder */ -+ ZSTD_BlockCompressor_f const blockCompressor = ZSTD_selectBlockCompressor( -+ zc->appliedParams.cParams.strategy, -+ zc->appliedParams.useRowMatchFinder, -+ dictMode); - ms->ldmSeqStore = NULL; - lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); - } - { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; - ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); - } } -+ ZSTD_validateSeqStore(&zc->seqStore, &zc->appliedParams.cParams); - return ZSTDbss_compress; - } - --static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) -+static size_t ZSTD_copyBlockSequences(SeqCollector* seqCollector, const SeqStore_t* seqStore, const U32 prevRepcodes[ZSTD_REP_NUM]) - { -- const seqStore_t* seqStore = ZSTD_getSeqStore(zc); -- const seqDef* seqStoreSeqs = seqStore->sequencesStart; -- size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs; -- size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart); -- size_t literalsRead = 0; -- size_t lastLLSize; -- -- ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; -+ const SeqDef* inSeqs = seqStore->sequencesStart; -+ const size_t nbInSequences = (size_t)(seqStore->sequences - inSeqs); -+ const size_t nbInLiterals = (size_t)(seqStore->lit - seqStore->litStart); -+ -+ ZSTD_Sequence* outSeqs = seqCollector->seqIndex == 0 ? seqCollector->seqStart : seqCollector->seqStart + seqCollector->seqIndex; -+ const size_t nbOutSequences = nbInSequences + 1; -+ size_t nbOutLiterals = 0; -+ Repcodes_t repcodes; - size_t i; -- repcodes_t updatedRepcodes; - -- assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); -- /* Ensure we have enough space for last literals "sequence" */ -- assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1); -- ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); -- for (i = 0; i < seqStoreSeqSize; ++i) { -- U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM; -- outSeqs[i].litLength = seqStoreSeqs[i].litLength; -- outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH; -+ /* Bounds check that we have enough space for every input sequence -+ * and the block delimiter -+ */ -+ assert(seqCollector->seqIndex <= seqCollector->maxSequences); -+ RETURN_ERROR_IF( -+ nbOutSequences > (size_t)(seqCollector->maxSequences - seqCollector->seqIndex), -+ dstSize_tooSmall, -+ "Not enough space to copy sequences"); -+ -+ ZSTD_memcpy(&repcodes, prevRepcodes, sizeof(repcodes)); -+ for (i = 0; i < nbInSequences; ++i) { -+ U32 rawOffset; -+ outSeqs[i].litLength = inSeqs[i].litLength; -+ outSeqs[i].matchLength = inSeqs[i].mlBase + MINMATCH; - outSeqs[i].rep = 0; - -+ /* Handle the possible single length >= 64K -+ * There can only be one because we add MINMATCH to every match length, -+ * and blocks are at most 128K. -+ */ - if (i == seqStore->longLengthPos) { - if (seqStore->longLengthType == ZSTD_llt_literalLength) { - outSeqs[i].litLength += 0x10000; -@@ -2832,46 +3376,75 @@ static void ZSTD_copyBlockSequences(ZSTD - } - } - -- if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) { -- /* Derive the correct offset corresponding to a repcode */ -- outSeqs[i].rep = seqStoreSeqs[i].offBase; -+ /* Determine the raw offset given the offBase, which may be a repcode. */ -+ if (OFFBASE_IS_REPCODE(inSeqs[i].offBase)) { -+ const U32 repcode = OFFBASE_TO_REPCODE(inSeqs[i].offBase); -+ assert(repcode > 0); -+ outSeqs[i].rep = repcode; - if (outSeqs[i].litLength != 0) { -- rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1]; -+ rawOffset = repcodes.rep[repcode - 1]; - } else { -- if (outSeqs[i].rep == 3) { -- rawOffset = updatedRepcodes.rep[0] - 1; -+ if (repcode == 3) { -+ assert(repcodes.rep[0] > 1); -+ rawOffset = repcodes.rep[0] - 1; - } else { -- rawOffset = updatedRepcodes.rep[outSeqs[i].rep]; -+ rawOffset = repcodes.rep[repcode]; - } - } -+ } else { -+ rawOffset = OFFBASE_TO_OFFSET(inSeqs[i].offBase); - } - outSeqs[i].offset = rawOffset; -- /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode -- so we provide seqStoreSeqs[i].offset - 1 */ -- ZSTD_updateRep(updatedRepcodes.rep, -- seqStoreSeqs[i].offBase - 1, -- seqStoreSeqs[i].litLength == 0); -- literalsRead += outSeqs[i].litLength; -+ -+ /* Update repcode history for the sequence */ -+ ZSTD_updateRep(repcodes.rep, -+ inSeqs[i].offBase, -+ inSeqs[i].litLength == 0); -+ -+ nbOutLiterals += outSeqs[i].litLength; - } - /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0. - * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker - * for the block boundary, according to the API. - */ -- assert(seqStoreLiteralsSize >= literalsRead); -- lastLLSize = seqStoreLiteralsSize - literalsRead; -- outSeqs[i].litLength = (U32)lastLLSize; -- outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0; -- seqStoreSeqSize++; -- zc->seqCollector.seqIndex += seqStoreSeqSize; -+ assert(nbInLiterals >= nbOutLiterals); -+ { -+ const size_t lastLLSize = nbInLiterals - nbOutLiterals; -+ outSeqs[nbInSequences].litLength = (U32)lastLLSize; -+ outSeqs[nbInSequences].matchLength = 0; -+ outSeqs[nbInSequences].offset = 0; -+ assert(nbOutSequences == nbInSequences + 1); -+ } -+ seqCollector->seqIndex += nbOutSequences; -+ assert(seqCollector->seqIndex <= seqCollector->maxSequences); -+ -+ return 0; -+} -+ -+size_t ZSTD_sequenceBound(size_t srcSize) { -+ const size_t maxNbSeq = (srcSize / ZSTD_MINMATCH_MIN) + 1; -+ const size_t maxNbDelims = (srcSize / ZSTD_BLOCKSIZE_MAX_MIN) + 1; -+ return maxNbSeq + maxNbDelims; - } - - size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, - size_t outSeqsSize, const void* src, size_t srcSize) - { - const size_t dstCapacity = ZSTD_compressBound(srcSize); -- void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem); -+ void* dst; /* Make C90 happy. */ - SeqCollector seqCollector; -+ { -+ int targetCBlockSize; -+ FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_targetCBlockSize, &targetCBlockSize), ""); -+ RETURN_ERROR_IF(targetCBlockSize != 0, parameter_unsupported, "targetCBlockSize != 0"); -+ } -+ { -+ int nbWorkers; -+ FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_nbWorkers, &nbWorkers), ""); -+ RETURN_ERROR_IF(nbWorkers != 0, parameter_unsupported, "nbWorkers != 0"); -+ } - -+ dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem); - RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); - - seqCollector.collectSequences = 1; -@@ -2880,8 +3453,12 @@ size_t ZSTD_generateSequences(ZSTD_CCtx* - seqCollector.maxSequences = outSeqsSize; - zc->seqCollector = seqCollector; - -- ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); -- ZSTD_customFree(dst, ZSTD_defaultCMem); -+ { -+ const size_t ret = ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); -+ ZSTD_customFree(dst, ZSTD_defaultCMem); -+ FORWARD_IF_ERROR(ret, "ZSTD_compress2 failed"); -+ } -+ assert(zc->seqCollector.seqIndex <= ZSTD_sequenceBound(srcSize)); - return zc->seqCollector.seqIndex; - } - -@@ -2910,19 +3487,17 @@ static int ZSTD_isRLE(const BYTE* src, s - const size_t unrollMask = unrollSize - 1; - const size_t prefixLength = length & unrollMask; - size_t i; -- size_t u; - if (length == 1) return 1; - /* Check if prefix is RLE first before using unrolled loop */ - if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) { - return 0; - } - for (i = prefixLength; i != length; i += unrollSize) { -+ size_t u; - for (u = 0; u < unrollSize; u += sizeof(size_t)) { - if (MEM_readST(ip + i + u) != valueST) { - return 0; -- } -- } -- } -+ } } } - return 1; - } - -@@ -2930,7 +3505,7 @@ static int ZSTD_isRLE(const BYTE* src, s - * This is just a heuristic based on the compressibility. - * It may return both false positives and false negatives. - */ --static int ZSTD_maybeRLE(seqStore_t const* seqStore) -+static int ZSTD_maybeRLE(SeqStore_t const* seqStore) - { - size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); - size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); -@@ -2938,7 +3513,8 @@ static int ZSTD_maybeRLE(seqStore_t cons - return nbSeqs < 4 && nbLits < 10; - } - --static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs) -+static void -+ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs) - { - ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock; - bs->prevCBlock = bs->nextCBlock; -@@ -2946,12 +3522,14 @@ static void ZSTD_blockState_confirmRepco - } - - /* Writes the block header */ --static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) { -+static void -+writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) -+{ - U32 const cBlockHeader = cSize == 1 ? - lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : - lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - MEM_writeLE24(op, cBlockHeader); -- DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock); -+ DEBUGLOG(5, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock); - } - - /* ZSTD_buildBlockEntropyStats_literals() : -@@ -2959,13 +3537,16 @@ static void writeBlockHeader(void* op, s - * Stores literals block type (raw, rle, compressed, repeat) and - * huffman description table to hufMetadata. - * Requires ENTROPY_WORKSPACE_SIZE workspace -- * @return : size of huffman description table or error code */ --static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, -- const ZSTD_hufCTables_t* prevHuf, -- ZSTD_hufCTables_t* nextHuf, -- ZSTD_hufCTablesMetadata_t* hufMetadata, -- const int literalsCompressionIsDisabled, -- void* workspace, size_t wkspSize) -+ * @return : size of huffman description table, or an error code -+ */ -+static size_t -+ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, -+ const ZSTD_hufCTables_t* prevHuf, -+ ZSTD_hufCTables_t* nextHuf, -+ ZSTD_hufCTablesMetadata_t* hufMetadata, -+ const int literalsCompressionIsDisabled, -+ void* workspace, size_t wkspSize, -+ int hufFlags) - { - BYTE* const wkspStart = (BYTE*)workspace; - BYTE* const wkspEnd = wkspStart + wkspSize; -@@ -2973,9 +3554,9 @@ static size_t ZSTD_buildBlockEntropyStat - unsigned* const countWksp = (unsigned*)workspace; - const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); - BYTE* const nodeWksp = countWkspStart + countWkspSize; -- const size_t nodeWkspSize = wkspEnd-nodeWksp; -+ const size_t nodeWkspSize = (size_t)(wkspEnd - nodeWksp); - unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; -- unsigned huffLog = HUF_TABLELOG_DEFAULT; -+ unsigned huffLog = LitHufLog; - HUF_repeat repeat = prevHuf->repeatMode; - DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize); - -@@ -2990,73 +3571,77 @@ static size_t ZSTD_buildBlockEntropyStat - - /* small ? don't even attempt compression (speed opt) */ - #ifndef COMPRESS_LITERALS_SIZE_MIN --#define COMPRESS_LITERALS_SIZE_MIN 63 -+# define COMPRESS_LITERALS_SIZE_MIN 63 /* heuristic */ - #endif - { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; - if (srcSize <= minLitSize) { - DEBUGLOG(5, "set_basic - too small"); - hufMetadata->hType = set_basic; - return 0; -- } -- } -+ } } - - /* Scan input and build symbol stats */ -- { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); -+ { size_t const largest = -+ HIST_count_wksp (countWksp, &maxSymbolValue, -+ (const BYTE*)src, srcSize, -+ workspace, wkspSize); - FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); - if (largest == srcSize) { -+ /* only one literal symbol */ - DEBUGLOG(5, "set_rle"); - hufMetadata->hType = set_rle; - return 0; - } - if (largest <= (srcSize >> 7)+4) { -+ /* heuristic: likely not compressible */ - DEBUGLOG(5, "set_basic - no gain"); - hufMetadata->hType = set_basic; - return 0; -- } -- } -+ } } - - /* Validate the previous Huffman table */ -- if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { -+ if (repeat == HUF_repeat_check -+ && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { - repeat = HUF_repeat_none; - } - - /* Build Huffman Tree */ - ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); -- huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); -+ huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, nodeWksp, nodeWkspSize, nextHuf->CTable, countWksp, hufFlags); -+ assert(huffLog <= LitHufLog); - { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, - maxSymbolValue, huffLog, - nodeWksp, nodeWkspSize); - FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); - huffLog = (U32)maxBits; -- { /* Build and write the CTable */ -- size_t const newCSize = HUF_estimateCompressedSize( -- (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); -- size_t const hSize = HUF_writeCTable_wksp( -- hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), -- (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, -- nodeWksp, nodeWkspSize); -- /* Check against repeating the previous CTable */ -- if (repeat != HUF_repeat_none) { -- size_t const oldCSize = HUF_estimateCompressedSize( -- (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); -- if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { -- DEBUGLOG(5, "set_repeat - smaller"); -- ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); -- hufMetadata->hType = set_repeat; -- return 0; -- } -- } -- if (newCSize + hSize >= srcSize) { -- DEBUGLOG(5, "set_basic - no gains"); -+ } -+ { /* Build and write the CTable */ -+ size_t const newCSize = HUF_estimateCompressedSize( -+ (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); -+ size_t const hSize = HUF_writeCTable_wksp( -+ hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), -+ (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, -+ nodeWksp, nodeWkspSize); -+ /* Check against repeating the previous CTable */ -+ if (repeat != HUF_repeat_none) { -+ size_t const oldCSize = HUF_estimateCompressedSize( -+ (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); -+ if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { -+ DEBUGLOG(5, "set_repeat - smaller"); - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); -- hufMetadata->hType = set_basic; -+ hufMetadata->hType = set_repeat; - return 0; -- } -- DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); -- hufMetadata->hType = set_compressed; -- nextHuf->repeatMode = HUF_repeat_check; -- return hSize; -+ } } -+ if (newCSize + hSize >= srcSize) { -+ DEBUGLOG(5, "set_basic - no gains"); -+ ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); -+ hufMetadata->hType = set_basic; -+ return 0; - } -+ DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); -+ hufMetadata->hType = set_compressed; -+ nextHuf->repeatMode = HUF_repeat_check; -+ return hSize; - } - } - -@@ -3066,8 +3651,9 @@ static size_t ZSTD_buildBlockEntropyStat - * and updates nextEntropy to the appropriate repeatMode. - */ - static ZSTD_symbolEncodingTypeStats_t --ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) { -- ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0}; -+ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) -+{ -+ ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0, 0}; - nextEntropy->litlength_repeatMode = FSE_repeat_none; - nextEntropy->offcode_repeatMode = FSE_repeat_none; - nextEntropy->matchlength_repeatMode = FSE_repeat_none; -@@ -3078,16 +3664,18 @@ ZSTD_buildDummySequencesStatistics(ZSTD_ - * Builds entropy for the sequences. - * Stores symbol compression modes and fse table to fseMetadata. - * Requires ENTROPY_WORKSPACE_SIZE wksp. -- * @return : size of fse tables or error code */ --static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr, -- const ZSTD_fseCTables_t* prevEntropy, -- ZSTD_fseCTables_t* nextEntropy, -- const ZSTD_CCtx_params* cctxParams, -- ZSTD_fseCTablesMetadata_t* fseMetadata, -- void* workspace, size_t wkspSize) -+ * @return : size of fse tables or error code */ -+static size_t -+ZSTD_buildBlockEntropyStats_sequences( -+ const SeqStore_t* seqStorePtr, -+ const ZSTD_fseCTables_t* prevEntropy, -+ ZSTD_fseCTables_t* nextEntropy, -+ const ZSTD_CCtx_params* cctxParams, -+ ZSTD_fseCTablesMetadata_t* fseMetadata, -+ void* workspace, size_t wkspSize) - { - ZSTD_strategy const strategy = cctxParams->cParams.strategy; -- size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; -+ size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - BYTE* const ostart = fseMetadata->fseTablesBuffer; - BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); - BYTE* op = ostart; -@@ -3103,9 +3691,9 @@ static size_t ZSTD_buildBlockEntropyStat - entropyWorkspace, entropyWorkspaceSize) - : ZSTD_buildDummySequencesStatistics(nextEntropy); - FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!"); -- fseMetadata->llType = (symbolEncodingType_e) stats.LLtype; -- fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype; -- fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype; -+ fseMetadata->llType = (SymbolEncodingType_e) stats.LLtype; -+ fseMetadata->ofType = (SymbolEncodingType_e) stats.Offtype; -+ fseMetadata->mlType = (SymbolEncodingType_e) stats.MLtype; - fseMetadata->lastCountSize = stats.lastCountSize; - return stats.size; - } -@@ -3114,23 +3702,28 @@ static size_t ZSTD_buildBlockEntropyStat - /* ZSTD_buildBlockEntropyStats() : - * Builds entropy for the block. - * Requires workspace size ENTROPY_WORKSPACE_SIZE -- * -- * @return : 0 on success or error code -+ * @return : 0 on success, or an error code -+ * Note : also employed in superblock - */ --size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, -- const ZSTD_entropyCTables_t* prevEntropy, -- ZSTD_entropyCTables_t* nextEntropy, -- const ZSTD_CCtx_params* cctxParams, -- ZSTD_entropyCTablesMetadata_t* entropyMetadata, -- void* workspace, size_t wkspSize) --{ -- size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; -+size_t ZSTD_buildBlockEntropyStats( -+ const SeqStore_t* seqStorePtr, -+ const ZSTD_entropyCTables_t* prevEntropy, -+ ZSTD_entropyCTables_t* nextEntropy, -+ const ZSTD_CCtx_params* cctxParams, -+ ZSTD_entropyCTablesMetadata_t* entropyMetadata, -+ void* workspace, size_t wkspSize) -+{ -+ size_t const litSize = (size_t)(seqStorePtr->lit - seqStorePtr->litStart); -+ int const huf_useOptDepth = (cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD); -+ int const hufFlags = huf_useOptDepth ? HUF_flags_optimalDepth : 0; -+ - entropyMetadata->hufMetadata.hufDesSize = - ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize, - &prevEntropy->huf, &nextEntropy->huf, - &entropyMetadata->hufMetadata, - ZSTD_literalsCompressionIsDisabled(cctxParams), -- workspace, wkspSize); -+ workspace, wkspSize, hufFlags); -+ - FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed"); - entropyMetadata->fseMetadata.fseTablesSize = - ZSTD_buildBlockEntropyStats_sequences(seqStorePtr, -@@ -3143,11 +3736,12 @@ size_t ZSTD_buildBlockEntropyStats(seqSt - } - - /* Returns the size estimate for the literals section (header + content) of a block */ --static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize, -- const ZSTD_hufCTables_t* huf, -- const ZSTD_hufCTablesMetadata_t* hufMetadata, -- void* workspace, size_t wkspSize, -- int writeEntropy) -+static size_t -+ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize, -+ const ZSTD_hufCTables_t* huf, -+ const ZSTD_hufCTablesMetadata_t* hufMetadata, -+ void* workspace, size_t wkspSize, -+ int writeEntropy) - { - unsigned* const countWksp = (unsigned*)workspace; - unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; -@@ -3169,12 +3763,13 @@ static size_t ZSTD_estimateBlockSize_lit - } - - /* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */ --static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type, -- const BYTE* codeTable, size_t nbSeq, unsigned maxCode, -- const FSE_CTable* fseCTable, -- const U8* additionalBits, -- short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, -- void* workspace, size_t wkspSize) -+static size_t -+ZSTD_estimateBlockSize_symbolType(SymbolEncodingType_e type, -+ const BYTE* codeTable, size_t nbSeq, unsigned maxCode, -+ const FSE_CTable* fseCTable, -+ const U8* additionalBits, -+ short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, -+ void* workspace, size_t wkspSize) - { - unsigned* const countWksp = (unsigned*)workspace; - const BYTE* ctp = codeTable; -@@ -3206,116 +3801,121 @@ static size_t ZSTD_estimateBlockSize_sym - } - - /* Returns the size estimate for the sequences section (header + content) of a block */ --static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable, -- const BYTE* llCodeTable, -- const BYTE* mlCodeTable, -- size_t nbSeq, -- const ZSTD_fseCTables_t* fseTables, -- const ZSTD_fseCTablesMetadata_t* fseMetadata, -- void* workspace, size_t wkspSize, -- int writeEntropy) -+static size_t -+ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable, -+ const BYTE* llCodeTable, -+ const BYTE* mlCodeTable, -+ size_t nbSeq, -+ const ZSTD_fseCTables_t* fseTables, -+ const ZSTD_fseCTablesMetadata_t* fseMetadata, -+ void* workspace, size_t wkspSize, -+ int writeEntropy) - { - size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ); - size_t cSeqSizeEstimate = 0; - cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff, -- fseTables->offcodeCTable, NULL, -- OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, -- workspace, wkspSize); -+ fseTables->offcodeCTable, NULL, -+ OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, -+ workspace, wkspSize); - cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL, -- fseTables->litlengthCTable, LL_bits, -- LL_defaultNorm, LL_defaultNormLog, MaxLL, -- workspace, wkspSize); -+ fseTables->litlengthCTable, LL_bits, -+ LL_defaultNorm, LL_defaultNormLog, MaxLL, -+ workspace, wkspSize); - cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML, -- fseTables->matchlengthCTable, ML_bits, -- ML_defaultNorm, ML_defaultNormLog, MaxML, -- workspace, wkspSize); -+ fseTables->matchlengthCTable, ML_bits, -+ ML_defaultNorm, ML_defaultNormLog, MaxML, -+ workspace, wkspSize); - if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; - return cSeqSizeEstimate + sequencesSectionHeaderSize; - } - - /* Returns the size estimate for a given stream of literals, of, ll, ml */ --static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize, -- const BYTE* ofCodeTable, -- const BYTE* llCodeTable, -- const BYTE* mlCodeTable, -- size_t nbSeq, -- const ZSTD_entropyCTables_t* entropy, -- const ZSTD_entropyCTablesMetadata_t* entropyMetadata, -- void* workspace, size_t wkspSize, -- int writeLitEntropy, int writeSeqEntropy) { -+static size_t -+ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize, -+ const BYTE* ofCodeTable, -+ const BYTE* llCodeTable, -+ const BYTE* mlCodeTable, -+ size_t nbSeq, -+ const ZSTD_entropyCTables_t* entropy, -+ const ZSTD_entropyCTablesMetadata_t* entropyMetadata, -+ void* workspace, size_t wkspSize, -+ int writeLitEntropy, int writeSeqEntropy) -+{ - size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize, -- &entropy->huf, &entropyMetadata->hufMetadata, -- workspace, wkspSize, writeLitEntropy); -+ &entropy->huf, &entropyMetadata->hufMetadata, -+ workspace, wkspSize, writeLitEntropy); - size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, -- nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, -- workspace, wkspSize, writeSeqEntropy); -+ nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, -+ workspace, wkspSize, writeSeqEntropy); - return seqSize + literalsSize + ZSTD_blockHeaderSize; - } - - /* Builds entropy statistics and uses them for blocksize estimation. - * -- * Returns the estimated compressed size of the seqStore, or a zstd error. -+ * @return: estimated compressed size of the seqStore, or a zstd error. - */ --static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) { -- ZSTD_entropyCTablesMetadata_t* entropyMetadata = &zc->blockSplitCtx.entropyMetadata; -+static size_t -+ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(SeqStore_t* seqStore, ZSTD_CCtx* zc) -+{ -+ ZSTD_entropyCTablesMetadata_t* const entropyMetadata = &zc->blockSplitCtx.entropyMetadata; - DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()"); - FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore, - &zc->blockState.prevCBlock->entropy, - &zc->blockState.nextCBlock->entropy, - &zc->appliedParams, - entropyMetadata, -- zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); -- return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart), -+ zc->tmpWorkspace, zc->tmpWkspSize), ""); -+ return ZSTD_estimateBlockSize( -+ seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart), - seqStore->ofCode, seqStore->llCode, seqStore->mlCode, - (size_t)(seqStore->sequences - seqStore->sequencesStart), -- &zc->blockState.nextCBlock->entropy, entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, -+ &zc->blockState.nextCBlock->entropy, -+ entropyMetadata, -+ zc->tmpWorkspace, zc->tmpWkspSize, - (int)(entropyMetadata->hufMetadata.hType == set_compressed), 1); - } - - /* Returns literals bytes represented in a seqStore */ --static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) { -+static size_t ZSTD_countSeqStoreLiteralsBytes(const SeqStore_t* const seqStore) -+{ - size_t literalsBytes = 0; -- size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; -+ size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); - size_t i; - for (i = 0; i < nbSeqs; ++i) { -- seqDef seq = seqStore->sequencesStart[i]; -+ SeqDef const seq = seqStore->sequencesStart[i]; - literalsBytes += seq.litLength; - if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) { - literalsBytes += 0x10000; -- } -- } -+ } } - return literalsBytes; - } - - /* Returns match bytes represented in a seqStore */ --static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) { -+static size_t ZSTD_countSeqStoreMatchBytes(const SeqStore_t* const seqStore) -+{ - size_t matchBytes = 0; -- size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; -+ size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); - size_t i; - for (i = 0; i < nbSeqs; ++i) { -- seqDef seq = seqStore->sequencesStart[i]; -+ SeqDef seq = seqStore->sequencesStart[i]; - matchBytes += seq.mlBase + MINMATCH; - if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) { - matchBytes += 0x10000; -- } -- } -+ } } - return matchBytes; - } - - /* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx). - * Stores the result in resultSeqStore. - */ --static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, -- const seqStore_t* originalSeqStore, -- size_t startIdx, size_t endIdx) { -- BYTE* const litEnd = originalSeqStore->lit; -- size_t literalsBytes; -- size_t literalsBytesPreceding = 0; -- -+static void ZSTD_deriveSeqStoreChunk(SeqStore_t* resultSeqStore, -+ const SeqStore_t* originalSeqStore, -+ size_t startIdx, size_t endIdx) -+{ - *resultSeqStore = *originalSeqStore; - if (startIdx > 0) { - resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx; -- literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); -+ resultSeqStore->litStart += ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); - } - - /* Move longLengthPos into the correct position if necessary */ -@@ -3328,13 +3928,12 @@ static void ZSTD_deriveSeqStoreChunk(seq - } - resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx; - resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx; -- literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); -- resultSeqStore->litStart += literalsBytesPreceding; - if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) { - /* This accounts for possible last literals if the derived chunk reaches the end of the block */ -- resultSeqStore->lit = litEnd; -+ assert(resultSeqStore->lit == originalSeqStore->lit); - } else { -- resultSeqStore->lit = resultSeqStore->litStart+literalsBytes; -+ size_t const literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); -+ resultSeqStore->lit = resultSeqStore->litStart + literalsBytes; - } - resultSeqStore->llCode += startIdx; - resultSeqStore->mlCode += startIdx; -@@ -3342,20 +3941,26 @@ static void ZSTD_deriveSeqStoreChunk(seq - } - - /* -- * Returns the raw offset represented by the combination of offCode, ll0, and repcode history. -- * offCode must represent a repcode in the numeric representation of ZSTD_storeSeq(). -+ * Returns the raw offset represented by the combination of offBase, ll0, and repcode history. -+ * offBase must represent a repcode in the numeric representation of ZSTD_storeSeq(). - */ - static U32 --ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0) -+ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offBase, const U32 ll0) - { -- U32 const adjustedOffCode = STORED_REPCODE(offCode) - 1 + ll0; /* [ 0 - 3 ] */ -- assert(STORED_IS_REPCODE(offCode)); -- if (adjustedOffCode == ZSTD_REP_NUM) { -- /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */ -- assert(rep[0] > 0); -+ U32 const adjustedRepCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0; /* [ 0 - 3 ] */ -+ assert(OFFBASE_IS_REPCODE(offBase)); -+ if (adjustedRepCode == ZSTD_REP_NUM) { -+ assert(ll0); -+ /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 -+ * This is only valid if it results in a valid offset value, aka > 0. -+ * Note : it may happen that `rep[0]==1` in exceptional circumstances. -+ * In which case this function will return 0, which is an invalid offset. -+ * It's not an issue though, since this value will be -+ * compared and discarded within ZSTD_seqStore_resolveOffCodes(). -+ */ - return rep[0] - 1; - } -- return rep[adjustedOffCode]; -+ return rep[adjustedRepCode]; - } - - /* -@@ -3371,30 +3976,33 @@ ZSTD_resolveRepcodeToRawOffset(const U32 - * 1-3 : repcode 1-3 - * 4+ : real_offset+3 - */ --static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes, -- seqStore_t* const seqStore, U32 const nbSeq) { -+static void -+ZSTD_seqStore_resolveOffCodes(Repcodes_t* const dRepcodes, Repcodes_t* const cRepcodes, -+ const SeqStore_t* const seqStore, U32 const nbSeq) -+{ - U32 idx = 0; -+ U32 const longLitLenIdx = seqStore->longLengthType == ZSTD_llt_literalLength ? seqStore->longLengthPos : nbSeq; - for (; idx < nbSeq; ++idx) { -- seqDef* const seq = seqStore->sequencesStart + idx; -- U32 const ll0 = (seq->litLength == 0); -- U32 const offCode = OFFBASE_TO_STORED(seq->offBase); -- assert(seq->offBase > 0); -- if (STORED_IS_REPCODE(offCode)) { -- U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0); -- U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0); -+ SeqDef* const seq = seqStore->sequencesStart + idx; -+ U32 const ll0 = (seq->litLength == 0) && (idx != longLitLenIdx); -+ U32 const offBase = seq->offBase; -+ assert(offBase > 0); -+ if (OFFBASE_IS_REPCODE(offBase)) { -+ U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offBase, ll0); -+ U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offBase, ll0); - /* Adjust simulated decompression repcode history if we come across a mismatch. Replace - * the repcode with the offset it actually references, determined by the compression - * repcode history. - */ - if (dRawOffset != cRawOffset) { -- seq->offBase = cRawOffset + ZSTD_REP_NUM; -+ seq->offBase = OFFSET_TO_OFFBASE(cRawOffset); - } - } - /* Compression repcode history is always updated with values directly from the unmodified seqStore. - * Decompression repcode history may use modified seq->offset value taken from compression repcode history. - */ -- ZSTD_updateRep(dRepcodes->rep, OFFBASE_TO_STORED(seq->offBase), ll0); -- ZSTD_updateRep(cRepcodes->rep, offCode, ll0); -+ ZSTD_updateRep(dRepcodes->rep, seq->offBase, ll0); -+ ZSTD_updateRep(cRepcodes->rep, offBase, ll0); - } - } - -@@ -3404,10 +4012,11 @@ static void ZSTD_seqStore_resolveOffCode - * Returns the total size of that block (including header) or a ZSTD error code. - */ - static size_t --ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore, -- repcodes_t* const dRep, repcodes_t* const cRep, -+ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, -+ const SeqStore_t* const seqStore, -+ Repcodes_t* const dRep, Repcodes_t* const cRep, - void* dst, size_t dstCapacity, -- const void* src, size_t srcSize, -+ const void* src, size_t srcSize, - U32 lastBlock, U32 isPartition) - { - const U32 rleMaxLength = 25; -@@ -3417,7 +4026,7 @@ ZSTD_compressSeqStore_singleBlock(ZSTD_C - size_t cSeqsSize; - - /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */ -- repcodes_t const dRepOriginal = *dRep; -+ Repcodes_t const dRepOriginal = *dRep; - DEBUGLOG(5, "ZSTD_compressSeqStore_singleBlock"); - if (isPartition) - ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart)); -@@ -3428,7 +4037,7 @@ ZSTD_compressSeqStore_singleBlock(ZSTD_C - &zc->appliedParams, - op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, - srcSize, -- zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, -+ zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */, - zc->bmi2); - FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!"); - -@@ -3442,8 +4051,9 @@ ZSTD_compressSeqStore_singleBlock(ZSTD_C - cSeqsSize = 1; - } - -+ /* Sequence collection not supported when block splitting */ - if (zc->seqCollector.collectSequences) { -- ZSTD_copyBlockSequences(zc); -+ FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, seqStore, dRepOriginal.rep), "copyBlockSequences failed"); - ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); - return 0; - } -@@ -3451,18 +4061,18 @@ ZSTD_compressSeqStore_singleBlock(ZSTD_C - if (cSeqsSize == 0) { - cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); - FORWARD_IF_ERROR(cSize, "Nocompress block failed"); -- DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize); -+ DEBUGLOG(5, "Writing out nocompress block, size: %zu", cSize); - *dRep = dRepOriginal; /* reset simulated decompression repcode history */ - } else if (cSeqsSize == 1) { - cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock); - FORWARD_IF_ERROR(cSize, "RLE compress block failed"); -- DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize); -+ DEBUGLOG(5, "Writing out RLE block, size: %zu", cSize); - *dRep = dRepOriginal; /* reset simulated decompression repcode history */ - } else { - ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); - writeBlockHeader(op, cSeqsSize, srcSize, lastBlock); - cSize = ZSTD_blockHeaderSize + cSeqsSize; -- DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize); -+ DEBUGLOG(5, "Writing out compressed block, size: %zu", cSize); - } - - if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) -@@ -3481,45 +4091,49 @@ typedef struct { - - /* Helper function to perform the recursive search for block splits. - * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half. -- * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then -- * we do not recurse. -+ * If advantageous to split, then we recurse down the two sub-blocks. -+ * If not, or if an error occurred in estimation, then we do not recurse. - * -- * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING. -+ * Note: The recursion depth is capped by a heuristic minimum number of sequences, -+ * defined by MIN_SEQUENCES_BLOCK_SPLITTING. - * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING). - * In practice, recursion depth usually doesn't go beyond 4. - * -- * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize -+ * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. -+ * At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize - * maximum of 128 KB, this value is actually impossible to reach. - */ - static void - ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx, -- ZSTD_CCtx* zc, const seqStore_t* origSeqStore) -+ ZSTD_CCtx* zc, const SeqStore_t* origSeqStore) - { -- seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk; -- seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore; -- seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore; -+ SeqStore_t* const fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk; -+ SeqStore_t* const firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore; -+ SeqStore_t* const secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore; - size_t estimatedOriginalSize; - size_t estimatedFirstHalfSize; - size_t estimatedSecondHalfSize; - size_t midIdx = (startIdx + endIdx)/2; - -+ DEBUGLOG(5, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx); -+ assert(endIdx >= startIdx); - if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) { -- DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences"); -+ DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences (%zu)", endIdx - startIdx); - return; - } -- DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx); - ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx); - ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx); - ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx); - estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc); - estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc); - estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc); -- DEBUGLOG(4, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu", -+ DEBUGLOG(5, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu", - estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize); - if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) { - return; - } - if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) { -+ DEBUGLOG(5, "split decided at seqNb:%zu", midIdx); - ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore); - splits->splitLocations[splits->idx] = (U32)midIdx; - splits->idx++; -@@ -3527,14 +4141,18 @@ ZSTD_deriveBlockSplitsHelper(seqStoreSpl - } - } - --/* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio. -+/* Base recursive function. -+ * Populates a table with intra-block partition indices that can improve compression ratio. - * -- * Returns the number of splits made (which equals the size of the partition table - 1). -+ * @return: number of splits made (which equals the size of the partition table - 1). - */ --static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) { -- seqStoreSplits splits = {partitions, 0}; -+static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) -+{ -+ seqStoreSplits splits; -+ splits.splitLocations = partitions; -+ splits.idx = 0; - if (nbSeq <= 4) { -- DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split"); -+ DEBUGLOG(5, "ZSTD_deriveBlockSplits: Too few sequences to split (%u <= 4)", nbSeq); - /* Refuse to try and split anything with less than 4 sequences */ - return 0; - } -@@ -3550,18 +4168,20 @@ static size_t ZSTD_deriveBlockSplits(ZST - * Returns combined size of all blocks (which includes headers), or a ZSTD error code. - */ - static size_t --ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, -- const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) -+ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t blockSize, -+ U32 lastBlock, U32 nbSeq) - { - size_t cSize = 0; - const BYTE* ip = (const BYTE*)src; - BYTE* op = (BYTE*)dst; - size_t i = 0; - size_t srcBytesTotal = 0; -- U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */ -- seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore; -- seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore; -- size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq); -+ U32* const partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */ -+ SeqStore_t* const nextSeqStore = &zc->blockSplitCtx.nextSeqStore; -+ SeqStore_t* const currSeqStore = &zc->blockSplitCtx.currSeqStore; -+ size_t const numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq); - - /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history - * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two -@@ -3577,36 +4197,37 @@ ZSTD_compressBlock_splitBlock_internal(Z - * - * See ZSTD_seqStore_resolveOffCodes() for more details. - */ -- repcodes_t dRep; -- repcodes_t cRep; -- ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); -- ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); -- ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t)); -+ Repcodes_t dRep; -+ Repcodes_t cRep; -+ ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(Repcodes_t)); -+ ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(Repcodes_t)); -+ ZSTD_memset(nextSeqStore, 0, sizeof(SeqStore_t)); - -- DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", -+ DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", - (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, - (unsigned)zc->blockState.matchState.nextToUpdate); - - if (numSplits == 0) { -- size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore, -- &dRep, &cRep, -- op, dstCapacity, -- ip, blockSize, -- lastBlock, 0 /* isPartition */); -+ size_t cSizeSingleBlock = -+ ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore, -+ &dRep, &cRep, -+ op, dstCapacity, -+ ip, blockSize, -+ lastBlock, 0 /* isPartition */); - FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!"); - DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits"); -- assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); -+ assert(zc->blockSizeMax <= ZSTD_BLOCKSIZE_MAX); -+ assert(cSizeSingleBlock <= zc->blockSizeMax + ZSTD_blockHeaderSize); - return cSizeSingleBlock; - } - - ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]); - for (i = 0; i <= numSplits; ++i) { -- size_t srcBytes; - size_t cSizeChunk; - U32 const lastPartition = (i == numSplits); - U32 lastBlockEntireSrc = 0; - -- srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore); -+ size_t srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore); - srcBytesTotal += srcBytes; - if (lastPartition) { - /* This is the final partition, need to account for possible last literals */ -@@ -3621,7 +4242,8 @@ ZSTD_compressBlock_splitBlock_internal(Z - op, dstCapacity, - ip, srcBytes, - lastBlockEntireSrc, 1 /* isPartition */); -- DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk); -+ DEBUGLOG(5, "Estimated size: %zu vs %zu : actual size", -+ ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk); - FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!"); - - ip += srcBytes; -@@ -3629,12 +4251,12 @@ ZSTD_compressBlock_splitBlock_internal(Z - dstCapacity -= cSizeChunk; - cSize += cSizeChunk; - *currSeqStore = *nextSeqStore; -- assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); -+ assert(cSizeChunk <= zc->blockSizeMax + ZSTD_blockHeaderSize); - } -- /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes -- * for the next block. -+ /* cRep and dRep may have diverged during the compression. -+ * If so, we use the dRep repcodes for the next block. - */ -- ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t)); -+ ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(Repcodes_t)); - return cSize; - } - -@@ -3643,21 +4265,20 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, U32 lastBlock) - { -- const BYTE* ip = (const BYTE*)src; -- BYTE* op = (BYTE*)dst; - U32 nbSeq; - size_t cSize; -- DEBUGLOG(4, "ZSTD_compressBlock_splitBlock"); -- assert(zc->appliedParams.useBlockSplitter == ZSTD_ps_enable); -+ DEBUGLOG(5, "ZSTD_compressBlock_splitBlock"); -+ assert(zc->appliedParams.postBlockSplitter == ZSTD_ps_enable); - - { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); - FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); - if (bss == ZSTDbss_noCompress) { - if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) - zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; -- cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); -+ RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block"); -+ cSize = ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); - FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); -- DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block"); -+ DEBUGLOG(5, "ZSTD_compressBlock_splitBlock: Nocompress block"); - return cSize; - } - nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart); -@@ -3673,9 +4294,9 @@ ZSTD_compressBlock_internal(ZSTD_CCtx* z - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, U32 frame) - { -- /* This the upper bound for the length of an rle block. -- * This isn't the actual upper bound. Finding the real threshold -- * needs further investigation. -+ /* This is an estimated upper bound for the length of an rle block. -+ * This isn't the actual upper bound. -+ * Finding the real threshold needs further investigation. - */ - const U32 rleMaxLength = 25; - size_t cSize; -@@ -3687,11 +4308,15 @@ ZSTD_compressBlock_internal(ZSTD_CCtx* z - - { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); - FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); -- if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } -+ if (bss == ZSTDbss_noCompress) { -+ RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block"); -+ cSize = 0; -+ goto out; -+ } - } - - if (zc->seqCollector.collectSequences) { -- ZSTD_copyBlockSequences(zc); -+ FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, ZSTD_getSeqStore(zc), zc->blockState.prevCBlock->rep), "copyBlockSequences failed"); - ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); - return 0; - } -@@ -3702,7 +4327,7 @@ ZSTD_compressBlock_internal(ZSTD_CCtx* z - &zc->appliedParams, - dst, dstCapacity, - srcSize, -- zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, -+ zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */, - zc->bmi2); - - if (frame && -@@ -3767,10 +4392,11 @@ static size_t ZSTD_compressBlock_targetC - * * cSize >= blockBound(srcSize): We have expanded the block too much so - * emit an uncompressed block. - */ -- { -- size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); -+ { size_t const cSize = -+ ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); - if (cSize != ERROR(dstSize_tooSmall)) { -- size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); -+ size_t const maxCSize = -+ srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); - FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); - if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) { - ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); -@@ -3778,7 +4404,7 @@ static size_t ZSTD_compressBlock_targetC - } - } - } -- } -+ } /* if (bss == ZSTDbss_compress)*/ - - DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()"); - /* Superblock compression failed, attempt to emit a single no compress block. -@@ -3807,7 +4433,7 @@ static size_t ZSTD_compressBlock_targetC - return cSize; - } - --static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, -+static void ZSTD_overflowCorrectIfNeeded(ZSTD_MatchState_t* ms, - ZSTD_cwksp* ws, - ZSTD_CCtx_params const* params, - void const* ip, -@@ -3831,39 +4457,82 @@ static void ZSTD_overflowCorrectIfNeeded - } - } - -+#include "zstd_preSplit.h" -+ -+static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t srcSize, size_t blockSizeMax, int splitLevel, ZSTD_strategy strat, S64 savings) -+{ -+ /* split level based on compression strategy, from `fast` to `btultra2` */ -+ static const int splitLevels[] = { 0, 0, 1, 2, 2, 3, 3, 4, 4, 4 }; -+ /* note: conservatively only split full blocks (128 KB) currently. -+ * While it's possible to go lower, let's keep it simple for a first implementation. -+ * Besides, benefits of splitting are reduced when blocks are already small. -+ */ -+ if (srcSize < 128 KB || blockSizeMax < 128 KB) -+ return MIN(srcSize, blockSizeMax); -+ /* do not split incompressible data though: -+ * require verified savings to allow pre-splitting. -+ * Note: as a consequence, the first full block is not split. -+ */ -+ if (savings < 3) { -+ DEBUGLOG(6, "don't attempt splitting: savings (%i) too low", (int)savings); -+ return 128 KB; -+ } -+ /* apply @splitLevel, or use default value (which depends on @strat). -+ * note that splitting heuristic is still conditioned by @savings >= 3, -+ * so the first block will not reach this code path */ -+ if (splitLevel == 1) return 128 KB; -+ if (splitLevel == 0) { -+ assert(ZSTD_fast <= strat && strat <= ZSTD_btultra2); -+ splitLevel = splitLevels[strat]; -+ } else { -+ assert(2 <= splitLevel && splitLevel <= 6); -+ splitLevel -= 2; -+ } -+ return ZSTD_splitBlock(src, blockSizeMax, splitLevel, cctx->tmpWorkspace, cctx->tmpWkspSize); -+} -+ - /*! ZSTD_compress_frameChunk() : - * Compress a chunk of data into one or multiple blocks. - * All blocks will be terminated, all input will be consumed. - * Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. - * Frame is supposed already started (header already produced) --* @return : compressed size, or an error code -+* @return : compressed size, or an error code - */ - static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - U32 lastFrameChunk) - { -- size_t blockSize = cctx->blockSize; -+ size_t blockSizeMax = cctx->blockSizeMax; - size_t remaining = srcSize; - const BYTE* ip = (const BYTE*)src; - BYTE* const ostart = (BYTE*)dst; - BYTE* op = ostart; - U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; -+ S64 savings = (S64)cctx->consumedSrcSize - (S64)cctx->producedCSize; - - assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); - -- DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); -+ DEBUGLOG(5, "ZSTD_compress_frameChunk (srcSize=%u, blockSizeMax=%u)", (unsigned)srcSize, (unsigned)blockSizeMax); - if (cctx->appliedParams.fParams.checksumFlag && srcSize) - xxh64_update(&cctx->xxhState, src, srcSize); - - while (remaining) { -- ZSTD_matchState_t* const ms = &cctx->blockState.matchState; -- U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); -- -- RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, -+ ZSTD_MatchState_t* const ms = &cctx->blockState.matchState; -+ size_t const blockSize = ZSTD_optimalBlockSize(cctx, -+ ip, remaining, -+ blockSizeMax, -+ cctx->appliedParams.preBlockSplitter_level, -+ cctx->appliedParams.cParams.strategy, -+ savings); -+ U32 const lastBlock = lastFrameChunk & (blockSize == remaining); -+ assert(blockSize <= remaining); -+ -+ /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding -+ * additional 1. We need to revisit and change this logic to be more consistent */ -+ RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE + 1, - dstSize_tooSmall, - "not enough space to store compressed block"); -- if (remaining < blockSize) blockSize = remaining; - - ZSTD_overflowCorrectIfNeeded( - ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); -@@ -3899,8 +4568,23 @@ static size_t ZSTD_compress_frameChunk(Z - MEM_writeLE24(op, cBlockHeader); - cSize += ZSTD_blockHeaderSize; - } -- } -+ } /* if (ZSTD_useTargetCBlockSize(&cctx->appliedParams))*/ - -+ /* @savings is employed to ensure that splitting doesn't worsen expansion of incompressible data. -+ * Without splitting, the maximum expansion is 3 bytes per full block. -+ * An adversarial input could attempt to fudge the split detector, -+ * and make it split incompressible data, resulting in more block headers. -+ * Note that, since ZSTD_COMPRESSBOUND() assumes a worst case scenario of 1KB per block, -+ * and the splitter never creates blocks that small (current lower limit is 8 KB), -+ * there is already no risk to expand beyond ZSTD_COMPRESSBOUND() limit. -+ * But if the goal is to not expand by more than 3-bytes per 128 KB full block, -+ * then yes, it becomes possible to make the block splitter oversplit incompressible data. -+ * Using @savings, we enforce an even more conservative condition, -+ * requiring the presence of enough savings (at least 3 bytes) to authorize splitting, -+ * otherwise only full blocks are used. -+ * But being conservative is fine, -+ * since splitting barely compressible blocks is not fruitful anyway */ -+ savings += (S64)blockSize - (S64)cSize; - - ip += blockSize; - assert(remaining >= blockSize); -@@ -3919,8 +4603,10 @@ static size_t ZSTD_compress_frameChunk(Z - - - static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, -- const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) --{ BYTE* const op = (BYTE*)dst; -+ const ZSTD_CCtx_params* params, -+ U64 pledgedSrcSize, U32 dictID) -+{ -+ BYTE* const op = (BYTE*)dst; - U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ - U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ - U32 const checksumFlag = params->fParams.checksumFlag>0; -@@ -4001,19 +4687,15 @@ size_t ZSTD_writeLastEmptyBlock(void* ds - } - } - --size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) -+void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) - { -- RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong, -- "wrong cctx stage"); -- RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable, -- parameter_unsupported, -- "incompatible with ldm"); -+ assert(cctx->stage == ZSTDcs_init); -+ assert(nbSeq == 0 || cctx->appliedParams.ldmParams.enableLdm != ZSTD_ps_enable); - cctx->externSeqStore.seq = seq; - cctx->externSeqStore.size = nbSeq; - cctx->externSeqStore.capacity = nbSeq; - cctx->externSeqStore.pos = 0; - cctx->externSeqStore.posInSequence = 0; -- return 0; - } - - -@@ -4022,7 +4704,7 @@ static size_t ZSTD_compressContinue_inte - const void* src, size_t srcSize, - U32 frame, U32 lastFrameChunk) - { -- ZSTD_matchState_t* const ms = &cctx->blockState.matchState; -+ ZSTD_MatchState_t* const ms = &cctx->blockState.matchState; - size_t fhSize = 0; - - DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", -@@ -4057,7 +4739,7 @@ static size_t ZSTD_compressContinue_inte - src, (BYTE const*)src + srcSize); - } - -- DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); -+ DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSizeMax); - { size_t const cSize = frame ? - ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : - ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); -@@ -4078,58 +4760,90 @@ static size_t ZSTD_compressContinue_inte - } - } - --size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, -- void* dst, size_t dstCapacity, -- const void* src, size_t srcSize) -+size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize) - { - DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize); - return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); - } - -+/* NOTE: Must just wrap ZSTD_compressContinue_public() */ -+size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize) -+{ -+ return ZSTD_compressContinue_public(cctx, dst, dstCapacity, src, srcSize); -+} - --size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) -+static size_t ZSTD_getBlockSize_deprecated(const ZSTD_CCtx* cctx) - { - ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; - assert(!ZSTD_checkCParams(cParams)); -- return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); -+ return MIN(cctx->appliedParams.maxBlockSize, (size_t)1 << cParams.windowLog); - } - --size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) -+/* NOTE: Must just wrap ZSTD_getBlockSize_deprecated() */ -+size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) -+{ -+ return ZSTD_getBlockSize_deprecated(cctx); -+} -+ -+/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */ -+size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) - { - DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); -- { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); -+ { size_t const blockSizeMax = ZSTD_getBlockSize_deprecated(cctx); - RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); } - - return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); - } - -+/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */ -+size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) -+{ -+ return ZSTD_compressBlock_deprecated(cctx, dst, dstCapacity, src, srcSize); -+} -+ - /*! ZSTD_loadDictionaryContent() : - * @return : 0, or an error code - */ --static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, -- ldmState_t* ls, -- ZSTD_cwksp* ws, -- ZSTD_CCtx_params const* params, -- const void* src, size_t srcSize, -- ZSTD_dictTableLoadMethod_e dtlm) -+static size_t -+ZSTD_loadDictionaryContent(ZSTD_MatchState_t* ms, -+ ldmState_t* ls, -+ ZSTD_cwksp* ws, -+ ZSTD_CCtx_params const* params, -+ const void* src, size_t srcSize, -+ ZSTD_dictTableLoadMethod_e dtlm, -+ ZSTD_tableFillPurpose_e tfp) - { - const BYTE* ip = (const BYTE*) src; - const BYTE* const iend = ip + srcSize; - int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL; - -- /* Assert that we the ms params match the params we're being given */ -+ /* Assert that the ms params match the params we're being given */ - ZSTD_assertEqualCParams(params->cParams, ms->cParams); - -- if (srcSize > ZSTD_CHUNKSIZE_MAX) { -+ { /* Ensure large dictionaries can't cause index overflow */ -+ - /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX. - * Dictionaries right at the edge will immediately trigger overflow - * correction, but I don't want to insert extra constraints here. - */ -- U32 const maxDictSize = ZSTD_CURRENT_MAX - 1; -- /* We must have cleared our windows when our source is this large. */ -- assert(ZSTD_window_isEmpty(ms->window)); -- if (loadLdmDict) -- assert(ZSTD_window_isEmpty(ls->window)); -+ U32 maxDictSize = ZSTD_CURRENT_MAX - ZSTD_WINDOW_START_INDEX; -+ -+ int const CDictTaggedIndices = ZSTD_CDictIndicesAreTagged(¶ms->cParams); -+ if (CDictTaggedIndices && tfp == ZSTD_tfp_forCDict) { -+ /* Some dictionary matchfinders in zstd use "short cache", -+ * which treats the lower ZSTD_SHORT_CACHE_TAG_BITS of each -+ * CDict hashtable entry as a tag rather than as part of an index. -+ * When short cache is used, we need to truncate the dictionary -+ * so that its indices don't overlap with the tag. */ -+ U32 const shortCacheMaxDictSize = (1u << (32 - ZSTD_SHORT_CACHE_TAG_BITS)) - ZSTD_WINDOW_START_INDEX; -+ maxDictSize = MIN(maxDictSize, shortCacheMaxDictSize); -+ assert(!loadLdmDict); -+ } -+ - /* If the dictionary is too large, only load the suffix of the dictionary. */ - if (srcSize > maxDictSize) { - ip = iend - maxDictSize; -@@ -4138,35 +4852,59 @@ static size_t ZSTD_loadDictionaryContent - } - } - -- DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder); -+ if (srcSize > ZSTD_CHUNKSIZE_MAX) { -+ /* We must have cleared our windows when our source is this large. */ -+ assert(ZSTD_window_isEmpty(ms->window)); -+ if (loadLdmDict) assert(ZSTD_window_isEmpty(ls->window)); -+ } - ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0); -- ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); -- ms->forceNonContiguous = params->deterministicRefPrefix; - -- if (loadLdmDict) { -+ DEBUGLOG(4, "ZSTD_loadDictionaryContent: useRowMatchFinder=%d", (int)params->useRowMatchFinder); -+ -+ if (loadLdmDict) { /* Load the entire dict into LDM matchfinders. */ -+ DEBUGLOG(4, "ZSTD_loadDictionaryContent: Trigger loadLdmDict"); - ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0); - ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); -+ ZSTD_ldm_fillHashTable(ls, ip, iend, ¶ms->ldmParams); -+ DEBUGLOG(4, "ZSTD_loadDictionaryContent: ZSTD_ldm_fillHashTable completes"); - } - -+ /* If the dict is larger than we can reasonably index in our tables, only load the suffix. */ -+ { U32 maxDictSize = 1U << MIN(MAX(params->cParams.hashLog + 3, params->cParams.chainLog + 1), 31); -+ if (srcSize > maxDictSize) { -+ ip = iend - maxDictSize; -+ src = ip; -+ srcSize = maxDictSize; -+ } -+ } -+ -+ ms->nextToUpdate = (U32)(ip - ms->window.base); -+ ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); -+ ms->forceNonContiguous = params->deterministicRefPrefix; -+ - if (srcSize <= HASH_READ_SIZE) return 0; - - ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend); - -- if (loadLdmDict) -- ZSTD_ldm_fillHashTable(ls, ip, iend, ¶ms->ldmParams); -- - switch(params->cParams.strategy) - { - case ZSTD_fast: -- ZSTD_fillHashTable(ms, iend, dtlm); -+ ZSTD_fillHashTable(ms, iend, dtlm, tfp); - break; - case ZSTD_dfast: -- ZSTD_fillDoubleHashTable(ms, iend, dtlm); -+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR -+ ZSTD_fillDoubleHashTable(ms, iend, dtlm, tfp); -+#else -+ assert(0); /* shouldn't be called: cparams should've been adjusted. */ -+#endif - break; - - case ZSTD_greedy: - case ZSTD_lazy: - case ZSTD_lazy2: -+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) - assert(srcSize >= HASH_READ_SIZE); - if (ms->dedicatedDictSearch) { - assert(ms->chainTable != NULL); -@@ -4174,7 +4912,7 @@ static size_t ZSTD_loadDictionaryContent - } else { - assert(params->useRowMatchFinder != ZSTD_ps_auto); - if (params->useRowMatchFinder == ZSTD_ps_enable) { -- size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16); -+ size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog); - ZSTD_memset(ms->tagTable, 0, tagTableSize); - ZSTD_row_update(ms, iend-HASH_READ_SIZE); - DEBUGLOG(4, "Using row-based hash table for lazy dict"); -@@ -4183,14 +4921,24 @@ static size_t ZSTD_loadDictionaryContent - DEBUGLOG(4, "Using chain-based hash table for lazy dict"); - } - } -+#else -+ assert(0); /* shouldn't be called: cparams should've been adjusted. */ -+#endif - break; - - case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ - case ZSTD_btopt: - case ZSTD_btultra: - case ZSTD_btultra2: -+#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) - assert(srcSize >= HASH_READ_SIZE); -+ DEBUGLOG(4, "Fill %u bytes into the Binary Tree", (unsigned)srcSize); - ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); -+#else -+ assert(0); /* shouldn't be called: cparams should've been adjusted. */ -+#endif - break; - - default: -@@ -4233,20 +4981,19 @@ size_t ZSTD_loadCEntropy(ZSTD_compressed - { unsigned maxSymbolValue = 255; - unsigned hasZeroWeights = 1; - size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, -- dictEnd-dictPtr, &hasZeroWeights); -+ (size_t)(dictEnd-dictPtr), &hasZeroWeights); - - /* We only set the loaded table as valid if it contains all non-zero - * weights. Otherwise, we set it to check */ -- if (!hasZeroWeights) -+ if (!hasZeroWeights && maxSymbolValue == 255) - bs->entropy.huf.repeatMode = HUF_repeat_valid; - - RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); -- RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, ""); - dictPtr += hufHeaderSize; - } - - { unsigned offcodeLog; -- size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); -+ size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr)); - RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); - /* fill all offset symbols to avoid garbage at end of table */ -@@ -4261,7 +5008,7 @@ size_t ZSTD_loadCEntropy(ZSTD_compressed - - { short matchlengthNCount[MaxML+1]; - unsigned matchlengthMaxValue = MaxML, matchlengthLog; -- size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); -+ size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); - RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); - RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( -@@ -4275,7 +5022,7 @@ size_t ZSTD_loadCEntropy(ZSTD_compressed - - { short litlengthNCount[MaxLL+1]; - unsigned litlengthMaxValue = MaxLL, litlengthLog; -- size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); -+ size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); - RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); - RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( -@@ -4309,7 +5056,7 @@ size_t ZSTD_loadCEntropy(ZSTD_compressed - RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, ""); - } } } - -- return dictPtr - (const BYTE*)dict; -+ return (size_t)(dictPtr - (const BYTE*)dict); - } - - /* Dictionary format : -@@ -4322,11 +5069,12 @@ size_t ZSTD_loadCEntropy(ZSTD_compressed - * dictSize supposed >= 8 - */ - static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, -- ZSTD_matchState_t* ms, -+ ZSTD_MatchState_t* ms, - ZSTD_cwksp* ws, - ZSTD_CCtx_params const* params, - const void* dict, size_t dictSize, - ZSTD_dictTableLoadMethod_e dtlm, -+ ZSTD_tableFillPurpose_e tfp, - void* workspace) - { - const BYTE* dictPtr = (const BYTE*)dict; -@@ -4345,7 +5093,7 @@ static size_t ZSTD_loadZstdDictionary(ZS - { - size_t const dictContentSize = (size_t)(dictEnd - dictPtr); - FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( -- ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), ""); -+ ms, NULL, ws, params, dictPtr, dictContentSize, dtlm, tfp), ""); - } - return dictID; - } -@@ -4354,13 +5102,14 @@ static size_t ZSTD_loadZstdDictionary(ZS - * @return : dictID, or an error code */ - static size_t - ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, -- ZSTD_matchState_t* ms, -+ ZSTD_MatchState_t* ms, - ldmState_t* ls, - ZSTD_cwksp* ws, - const ZSTD_CCtx_params* params, - const void* dict, size_t dictSize, - ZSTD_dictContentType_e dictContentType, - ZSTD_dictTableLoadMethod_e dtlm, -+ ZSTD_tableFillPurpose_e tfp, - void* workspace) - { - DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); -@@ -4373,13 +5122,13 @@ ZSTD_compress_insertDictionary(ZSTD_comp - - /* dict restricted modes */ - if (dictContentType == ZSTD_dct_rawContent) -- return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm); -+ return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm, tfp); - - if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { - if (dictContentType == ZSTD_dct_auto) { - DEBUGLOG(4, "raw content dictionary detected"); - return ZSTD_loadDictionaryContent( -- ms, ls, ws, params, dict, dictSize, dtlm); -+ ms, ls, ws, params, dict, dictSize, dtlm, tfp); - } - RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); - assert(0); /* impossible */ -@@ -4387,13 +5136,14 @@ ZSTD_compress_insertDictionary(ZSTD_comp - - /* dict as full zstd dictionary */ - return ZSTD_loadZstdDictionary( -- bs, ms, ws, params, dict, dictSize, dtlm, workspace); -+ bs, ms, ws, params, dict, dictSize, dtlm, tfp, workspace); - } - - #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) - #define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL) - - /*! ZSTD_compressBegin_internal() : -+ * Assumption : either @dict OR @cdict (or none) is non-NULL, never both - * @return : 0, or an error code */ - static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, - const void* dict, size_t dictSize, -@@ -4426,11 +5176,11 @@ static size_t ZSTD_compressBegin_interna - cctx->blockState.prevCBlock, &cctx->blockState.matchState, - &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent, - cdict->dictContentSize, cdict->dictContentType, dtlm, -- cctx->entropyWorkspace) -+ ZSTD_tfp_forCCtx, cctx->tmpWorkspace) - : ZSTD_compress_insertDictionary( - cctx->blockState.prevCBlock, &cctx->blockState.matchState, - &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize, -- dictContentType, dtlm, cctx->entropyWorkspace); -+ dictContentType, dtlm, ZSTD_tfp_forCCtx, cctx->tmpWorkspace); - FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); - assert(dictID <= UINT_MAX); - cctx->dictID = (U32)dictID; -@@ -4471,11 +5221,11 @@ size_t ZSTD_compressBegin_advanced(ZSTD_ - &cctxParams, pledgedSrcSize); - } - --size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) -+static size_t -+ZSTD_compressBegin_usingDict_deprecated(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) - { - ZSTD_CCtx_params cctxParams; -- { -- ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); -+ { ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); - ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel); - } - DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); -@@ -4483,9 +5233,15 @@ size_t ZSTD_compressBegin_usingDict(ZSTD - &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); - } - -+size_t -+ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) -+{ -+ return ZSTD_compressBegin_usingDict_deprecated(cctx, dict, dictSize, compressionLevel); -+} -+ - size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) - { -- return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); -+ return ZSTD_compressBegin_usingDict_deprecated(cctx, NULL, 0, compressionLevel); - } - - -@@ -4496,14 +5252,13 @@ static size_t ZSTD_writeEpilogue(ZSTD_CC - { - BYTE* const ostart = (BYTE*)dst; - BYTE* op = ostart; -- size_t fhSize = 0; - - DEBUGLOG(4, "ZSTD_writeEpilogue"); - RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); - - /* special case : empty frame */ - if (cctx->stage == ZSTDcs_init) { -- fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); -+ size_t fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); - FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); - dstCapacity -= fhSize; - op += fhSize; -@@ -4513,8 +5268,9 @@ static size_t ZSTD_writeEpilogue(ZSTD_CC - if (cctx->stage != ZSTDcs_ending) { - /* write one last empty block, make it the "last" block */ - U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; -- RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); -- MEM_writeLE32(op, cBlockHeader24); -+ ZSTD_STATIC_ASSERT(ZSTD_BLOCKHEADERSIZE == 3); -+ RETURN_ERROR_IF(dstCapacity<3, dstSize_tooSmall, "no room for epilogue"); -+ MEM_writeLE24(op, cBlockHeader24); - op += ZSTD_blockHeaderSize; - dstCapacity -= ZSTD_blockHeaderSize; - } -@@ -4528,7 +5284,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CC - } - - cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ -- return op-ostart; -+ return (size_t)(op-ostart); - } - - void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize) -@@ -4537,9 +5293,9 @@ void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, si - (void)extraCSize; - } - --size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, -- void* dst, size_t dstCapacity, -- const void* src, size_t srcSize) -+size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize) - { - size_t endResult; - size_t const cSize = ZSTD_compressContinue_internal(cctx, -@@ -4563,6 +5319,14 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx - return cSize + endResult; - } - -+/* NOTE: Must just wrap ZSTD_compressEnd_public() */ -+size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize) -+{ -+ return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize); -+} -+ - size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, -@@ -4591,7 +5355,7 @@ size_t ZSTD_compress_advanced_internal( - FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, - dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, - params, srcSize, ZSTDb_not_buffered) , ""); -- return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); -+ return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize); - } - - size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, -@@ -4709,7 +5473,7 @@ static size_t ZSTD_initCDict_internal( - { size_t const dictID = ZSTD_compress_insertDictionary( - &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace, - ¶ms, cdict->dictContent, cdict->dictContentSize, -- dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); -+ dictContentType, ZSTD_dtlm_full, ZSTD_tfp_forCDict, cdict->entropyWorkspace); - FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); - assert(dictID <= (size_t)(U32)-1); - cdict->dictID = (U32)dictID; -@@ -4719,14 +5483,16 @@ static size_t ZSTD_initCDict_internal( - return 0; - } - --static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, -- ZSTD_dictLoadMethod_e dictLoadMethod, -- ZSTD_compressionParameters cParams, -- ZSTD_paramSwitch_e useRowMatchFinder, -- U32 enableDedicatedDictSearch, -- ZSTD_customMem customMem) -+static ZSTD_CDict* -+ZSTD_createCDict_advanced_internal(size_t dictSize, -+ ZSTD_dictLoadMethod_e dictLoadMethod, -+ ZSTD_compressionParameters cParams, -+ ZSTD_ParamSwitch_e useRowMatchFinder, -+ int enableDedicatedDictSearch, -+ ZSTD_customMem customMem) - { - if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; -+ DEBUGLOG(3, "ZSTD_createCDict_advanced_internal (dictSize=%u)", (unsigned)dictSize); - - { size_t const workspaceSize = - ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + -@@ -4763,6 +5529,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced(co - { - ZSTD_CCtx_params cctxParams; - ZSTD_memset(&cctxParams, 0, sizeof(cctxParams)); -+ DEBUGLOG(3, "ZSTD_createCDict_advanced, dictSize=%u, mode=%u", (unsigned)dictSize, (unsigned)dictContentType); - ZSTD_CCtxParams_init(&cctxParams, 0); - cctxParams.cParams = cParams; - cctxParams.customMem = customMem; -@@ -4783,7 +5550,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced2( - ZSTD_compressionParameters cParams; - ZSTD_CDict* cdict; - -- DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType); -+ DEBUGLOG(3, "ZSTD_createCDict_advanced2, dictSize=%u, mode=%u", (unsigned)dictSize, (unsigned)dictContentType); - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; - - if (cctxParams.enableDedicatedDictSearch) { -@@ -4802,7 +5569,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced2( - &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); - } - -- DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch); -+ DEBUGLOG(3, "ZSTD_createCDict_advanced2: DedicatedDictSearch=%u", cctxParams.enableDedicatedDictSearch); - cctxParams.cParams = cParams; - cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); - -@@ -4813,7 +5580,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced2( - if (!cdict) - return NULL; - -- if (ZSTD_isError( ZSTD_initCDict_internal(cdict, -+ if (!cdict || ZSTD_isError( ZSTD_initCDict_internal(cdict, - dict, dictSize, - dictLoadMethod, dictContentType, - cctxParams) )) { -@@ -4867,7 +5634,7 @@ size_t ZSTD_freeCDict(ZSTD_CDict* cdict) - * workspaceSize: Use ZSTD_estimateCDictSize() - * to determine how large workspace must be. - * cParams : use ZSTD_getCParams() to transform a compression level -- * into its relevants cParams. -+ * into its relevant cParams. - * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) - * Note : there is no corresponding "free" function. - * Since workspace was allocated externally, it must be freed externally. -@@ -4879,7 +5646,7 @@ const ZSTD_CDict* ZSTD_initStaticCDict( - ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams) - { -- ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams); -+ ZSTD_ParamSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams); - /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */ - size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0); - size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) -@@ -4890,6 +5657,7 @@ const ZSTD_CDict* ZSTD_initStaticCDict( - ZSTD_CDict* cdict; - ZSTD_CCtx_params params; - -+ DEBUGLOG(4, "ZSTD_initStaticCDict (dictSize==%u)", (unsigned)dictSize); - if ((size_t)workspace & 7) return NULL; /* 8-aligned */ - - { -@@ -4900,14 +5668,13 @@ const ZSTD_CDict* ZSTD_initStaticCDict( - ZSTD_cwksp_move(&cdict->workspace, &ws); - } - -- DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", -- (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); - if (workspaceSize < neededSize) return NULL; - - ZSTD_CCtxParams_init(¶ms, 0); - params.cParams = cParams; - params.useRowMatchFinder = useRowMatchFinder; - cdict->useRowMatchFinder = useRowMatchFinder; -+ cdict->compressionLevel = ZSTD_NO_CLEVEL; - - if (ZSTD_isError( ZSTD_initCDict_internal(cdict, - dict, dictSize, -@@ -4987,12 +5754,17 @@ size_t ZSTD_compressBegin_usingCDict_adv - - /* ZSTD_compressBegin_usingCDict() : - * cdict must be != NULL */ --size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) -+size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) - { - ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; - return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); - } - -+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) -+{ -+ return ZSTD_compressBegin_usingCDict_deprecated(cctx, cdict); -+} -+ - /*! ZSTD_compress_usingCDict_internal(): - * Implementation of various ZSTD_compress_usingCDict* functions. - */ -@@ -5002,7 +5774,7 @@ static size_t ZSTD_compress_usingCDict_i - const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) - { - FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ -- return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); -+ return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize); - } - - /*! ZSTD_compress_usingCDict_advanced(): -@@ -5068,7 +5840,7 @@ size_t ZSTD_CStreamOutSize(void) - return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; - } - --static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize) -+static ZSTD_CParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize) - { - if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) - return ZSTD_cpm_attachDict; -@@ -5199,30 +5971,41 @@ size_t ZSTD_initCStream(ZSTD_CStream* zc - - static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) - { -- size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; -- if (hintInSize==0) hintInSize = cctx->blockSize; -- return hintInSize; -+ if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { -+ return cctx->blockSizeMax - cctx->stableIn_notConsumed; -+ } -+ assert(cctx->appliedParams.inBufferMode == ZSTD_bm_buffered); -+ { size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; -+ if (hintInSize==0) hintInSize = cctx->blockSizeMax; -+ return hintInSize; -+ } - } - - /* ZSTD_compressStream_generic(): - * internal function for all *compressStream*() variants -- * non-static, because can be called from zstdmt_compress.c -- * @return : hint size for next input */ -+ * @return : hint size for next input to complete ongoing block */ - static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input, - ZSTD_EndDirective const flushMode) - { -- const char* const istart = (const char*)input->src; -- const char* const iend = input->size != 0 ? istart + input->size : istart; -- const char* ip = input->pos != 0 ? istart + input->pos : istart; -- char* const ostart = (char*)output->dst; -- char* const oend = output->size != 0 ? ostart + output->size : ostart; -- char* op = output->pos != 0 ? ostart + output->pos : ostart; -+ const char* const istart = (assert(input != NULL), (const char*)input->src); -+ const char* const iend = (istart != NULL) ? istart + input->size : istart; -+ const char* ip = (istart != NULL) ? istart + input->pos : istart; -+ char* const ostart = (assert(output != NULL), (char*)output->dst); -+ char* const oend = (ostart != NULL) ? ostart + output->size : ostart; -+ char* op = (ostart != NULL) ? ostart + output->pos : ostart; - U32 someMoreWork = 1; - - /* check expectations */ -- DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode); -+ DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%i, srcSize = %zu", (int)flushMode, input->size - input->pos); -+ assert(zcs != NULL); -+ if (zcs->appliedParams.inBufferMode == ZSTD_bm_stable) { -+ assert(input->pos >= zcs->stableIn_notConsumed); -+ input->pos -= zcs->stableIn_notConsumed; -+ if (ip) ip -= zcs->stableIn_notConsumed; -+ zcs->stableIn_notConsumed = 0; -+ } - if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { - assert(zcs->inBuff != NULL); - assert(zcs->inBuffSize > 0); -@@ -5231,8 +6014,10 @@ static size_t ZSTD_compressStream_generi - assert(zcs->outBuff != NULL); - assert(zcs->outBuffSize > 0); - } -- assert(output->pos <= output->size); -+ if (input->src == NULL) assert(input->size == 0); - assert(input->pos <= input->size); -+ if (output->dst == NULL) assert(output->size == 0); -+ assert(output->pos <= output->size); - assert((U32)flushMode <= (U32)ZSTD_e_end); - - while (someMoreWork) { -@@ -5243,12 +6028,13 @@ static size_t ZSTD_compressStream_generi - - case zcss_load: - if ( (flushMode == ZSTD_e_end) -- && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip) /* Enough output space */ -+ && ( (size_t)(oend-op) >= ZSTD_compressBound((size_t)(iend-ip)) /* Enough output space */ - || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */ - && (zcs->inBuffPos == 0) ) { - /* shortcut to compression pass directly into output buffer */ -- size_t const cSize = ZSTD_compressEnd(zcs, -- op, oend-op, ip, iend-ip); -+ size_t const cSize = ZSTD_compressEnd_public(zcs, -+ op, (size_t)(oend-op), -+ ip, (size_t)(iend-ip)); - DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); - FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed"); - ip = iend; -@@ -5262,10 +6048,9 @@ static size_t ZSTD_compressStream_generi - size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; - size_t const loaded = ZSTD_limitCopy( - zcs->inBuff + zcs->inBuffPos, toLoad, -- ip, iend-ip); -+ ip, (size_t)(iend-ip)); - zcs->inBuffPos += loaded; -- if (loaded != 0) -- ip += loaded; -+ if (ip) ip += loaded; - if ( (flushMode == ZSTD_e_continue) - && (zcs->inBuffPos < zcs->inBuffTarget) ) { - /* not enough input to fill full block : stop here */ -@@ -5276,16 +6061,29 @@ static size_t ZSTD_compressStream_generi - /* empty */ - someMoreWork = 0; break; - } -+ } else { -+ assert(zcs->appliedParams.inBufferMode == ZSTD_bm_stable); -+ if ( (flushMode == ZSTD_e_continue) -+ && ( (size_t)(iend - ip) < zcs->blockSizeMax) ) { -+ /* can't compress a full block : stop here */ -+ zcs->stableIn_notConsumed = (size_t)(iend - ip); -+ ip = iend; /* pretend to have consumed input */ -+ someMoreWork = 0; break; -+ } -+ if ( (flushMode == ZSTD_e_flush) -+ && (ip == iend) ) { -+ /* empty */ -+ someMoreWork = 0; break; -+ } - } - /* compress current block (note : this stage cannot be stopped in the middle) */ - DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); - { int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered); - void* cDst; - size_t cSize; -- size_t oSize = oend-op; -- size_t const iSize = inputBuffered -- ? zcs->inBuffPos - zcs->inToCompress -- : MIN((size_t)(iend - ip), zcs->blockSize); -+ size_t oSize = (size_t)(oend-op); -+ size_t const iSize = inputBuffered ? zcs->inBuffPos - zcs->inToCompress -+ : MIN((size_t)(iend - ip), zcs->blockSizeMax); - if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) - cDst = op; /* compress into output buffer, to skip flush stage */ - else -@@ -5293,34 +6091,31 @@ static size_t ZSTD_compressStream_generi - if (inputBuffered) { - unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); - cSize = lastBlock ? -- ZSTD_compressEnd(zcs, cDst, oSize, -+ ZSTD_compressEnd_public(zcs, cDst, oSize, - zcs->inBuff + zcs->inToCompress, iSize) : -- ZSTD_compressContinue(zcs, cDst, oSize, -+ ZSTD_compressContinue_public(zcs, cDst, oSize, - zcs->inBuff + zcs->inToCompress, iSize); - FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); - zcs->frameEnded = lastBlock; - /* prepare next block */ -- zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; -+ zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSizeMax; - if (zcs->inBuffTarget > zcs->inBuffSize) -- zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; -+ zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSizeMax; - DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", - (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); - if (!lastBlock) - assert(zcs->inBuffTarget <= zcs->inBuffSize); - zcs->inToCompress = zcs->inBuffPos; -- } else { -- unsigned const lastBlock = (ip + iSize == iend); -- assert(flushMode == ZSTD_e_end /* Already validated */); -+ } else { /* !inputBuffered, hence ZSTD_bm_stable */ -+ unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip + iSize == iend); - cSize = lastBlock ? -- ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) : -- ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize); -+ ZSTD_compressEnd_public(zcs, cDst, oSize, ip, iSize) : -+ ZSTD_compressContinue_public(zcs, cDst, oSize, ip, iSize); - /* Consume the input prior to error checking to mirror buffered mode. */ -- if (iSize > 0) -- ip += iSize; -+ if (ip) ip += iSize; - FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); - zcs->frameEnded = lastBlock; -- if (lastBlock) -- assert(ip == iend); -+ if (lastBlock) assert(ip == iend); - } - if (cDst == op) { /* no need to flush */ - op += cSize; -@@ -5369,8 +6164,8 @@ static size_t ZSTD_compressStream_generi - } - } - -- input->pos = ip - istart; -- output->pos = op - ostart; -+ input->pos = (size_t)(ip - istart); -+ output->pos = (size_t)(op - ostart); - if (zcs->frameEnded) return 0; - return ZSTD_nextInputSizeHint(zcs); - } -@@ -5390,8 +6185,10 @@ size_t ZSTD_compressStream(ZSTD_CStream* - /* After a compression call set the expected input/output buffer. - * This is validated at the start of the next compression call. - */ --static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input) -+static void -+ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, const ZSTD_outBuffer* output, const ZSTD_inBuffer* input) - { -+ DEBUGLOG(5, "ZSTD_setBufferExpectations (for advanced stable in/out modes)"); - if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { - cctx->expectedInBuffer = *input; - } -@@ -5410,22 +6207,27 @@ static size_t ZSTD_checkBufferStability( - { - if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { - ZSTD_inBuffer const expect = cctx->expectedInBuffer; -- if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size) -- RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!"); -- if (endOp != ZSTD_e_end) -- RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!"); -+ if (expect.src != input->src || expect.pos != input->pos) -+ RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableInBuffer enabled but input differs!"); - } -+ (void)endOp; - if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { - size_t const outBufferSize = output->size - output->pos; - if (cctx->expectedOutBufferSize != outBufferSize) -- RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!"); -+ RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableOutBuffer enabled but output size differs!"); - } - return 0; - } - -+/* -+ * If @endOp == ZSTD_e_end, @inSize becomes pledgedSrcSize. -+ * Otherwise, it's ignored. -+ * @return: 0 on success, or a ZSTD_error code otherwise. -+ */ - static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, - ZSTD_EndDirective endOp, -- size_t inSize) { -+ size_t inSize) -+{ - ZSTD_CCtx_params params = cctx->requestedParams; - ZSTD_prefixDict const prefixDict = cctx->prefixDict; - FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ -@@ -5438,21 +6240,24 @@ static size_t ZSTD_CCtx_init_compressStr - */ - params.compressionLevel = cctx->cdict->compressionLevel; - } -- DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); -- if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */ -- { -- size_t const dictSize = prefixDict.dict -+ DEBUGLOG(4, "ZSTD_CCtx_init_compressStream2 : transparent init stage"); -+ if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-determine pledgedSrcSize */ -+ -+ { size_t const dictSize = prefixDict.dict - ? prefixDict.dictSize - : (cctx->cdict ? cctx->cdict->dictContentSize : 0); -- ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1); -+ ZSTD_CParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1); - params.cParams = ZSTD_getCParamsFromCCtxParams( - ¶ms, cctx->pledgedSrcSizePlusOne-1, - dictSize, mode); - } - -- params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, ¶ms.cParams); -+ params.postBlockSplitter = ZSTD_resolveBlockSplitterMode(params.postBlockSplitter, ¶ms.cParams); - params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, ¶ms.cParams); - params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams); -+ params.validateSequences = ZSTD_resolveExternalSequenceValidation(params.validateSequences); -+ params.maxBlockSize = ZSTD_resolveMaxBlockSize(params.maxBlockSize); -+ params.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(params.searchForExternalRepcodes, params.compressionLevel); - - { U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1; - assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); -@@ -5468,7 +6273,7 @@ static size_t ZSTD_CCtx_init_compressStr - /* for small input: avoid automatic flush on reaching end of block, since - * it would require to add a 3-bytes null block to end frame - */ -- cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize); -+ cctx->inBuffTarget = cctx->blockSizeMax + (cctx->blockSizeMax == pledgedSrcSize); - } else { - cctx->inBuffTarget = 0; - } -@@ -5479,6 +6284,8 @@ static size_t ZSTD_CCtx_init_compressStr - return 0; - } - -+/* @return provides a minimum amount of data remaining to be flushed from internal buffers -+ */ - size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input, -@@ -5493,8 +6300,27 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* - - /* transparent initialization stage */ - if (cctx->streamStage == zcss_init) { -- FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed"); -- ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */ -+ size_t const inputSize = input->size - input->pos; /* no obligation to start from pos==0 */ -+ size_t const totalInputSize = inputSize + cctx->stableIn_notConsumed; -+ if ( (cctx->requestedParams.inBufferMode == ZSTD_bm_stable) /* input is presumed stable, across invocations */ -+ && (endOp == ZSTD_e_continue) /* no flush requested, more input to come */ -+ && (totalInputSize < ZSTD_BLOCKSIZE_MAX) ) { /* not even reached one block yet */ -+ if (cctx->stableIn_notConsumed) { /* not the first time */ -+ /* check stable source guarantees */ -+ RETURN_ERROR_IF(input->src != cctx->expectedInBuffer.src, stabilityCondition_notRespected, "stableInBuffer condition not respected: wrong src pointer"); -+ RETURN_ERROR_IF(input->pos != cctx->expectedInBuffer.size, stabilityCondition_notRespected, "stableInBuffer condition not respected: externally modified pos"); -+ } -+ /* pretend input was consumed, to give a sense forward progress */ -+ input->pos = input->size; -+ /* save stable inBuffer, for later control, and flush/end */ -+ cctx->expectedInBuffer = *input; -+ /* but actually input wasn't consumed, so keep track of position from where compression shall resume */ -+ cctx->stableIn_notConsumed += inputSize; -+ /* don't initialize yet, wait for the first block of flush() order, for better parameters adaptation */ -+ return ZSTD_FRAMEHEADERSIZE_MIN(cctx->requestedParams.format); /* at least some header to produce */ -+ } -+ FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, totalInputSize), "compressStream2 initialization failed"); -+ ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */ - } - /* end of transparent initialization stage */ - -@@ -5512,13 +6338,20 @@ size_t ZSTD_compressStream2_simpleArgs ( - const void* src, size_t srcSize, size_t* srcPos, - ZSTD_EndDirective endOp) - { -- ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; -- ZSTD_inBuffer input = { src, srcSize, *srcPos }; -+ ZSTD_outBuffer output; -+ ZSTD_inBuffer input; -+ output.dst = dst; -+ output.size = dstCapacity; -+ output.pos = *dstPos; -+ input.src = src; -+ input.size = srcSize; -+ input.pos = *srcPos; - /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ -- size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); -- *dstPos = output.pos; -- *srcPos = input.pos; -- return cErr; -+ { size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); -+ *dstPos = output.pos; -+ *srcPos = input.pos; -+ return cErr; -+ } - } - - size_t ZSTD_compress2(ZSTD_CCtx* cctx, -@@ -5541,6 +6374,7 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx, - /* Reset to the original values. */ - cctx->requestedParams.inBufferMode = originalInBufferMode; - cctx->requestedParams.outBufferMode = originalOutBufferMode; -+ - FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed"); - if (result != 0) { /* compression not completed, due to lack of output space */ - assert(oPos == dstCapacity); -@@ -5551,64 +6385,67 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx, - } - } - --typedef struct { -- U32 idx; /* Index in array of ZSTD_Sequence */ -- U32 posInSequence; /* Position within sequence at idx */ -- size_t posInSrc; /* Number of bytes given by sequences provided so far */ --} ZSTD_sequencePosition; -- - /* ZSTD_validateSequence() : -- * @offCode : is presumed to follow format required by ZSTD_storeSeq() -+ * @offBase : must use the format required by ZSTD_storeSeq() - * @returns a ZSTD error code if sequence is not valid - */ - static size_t --ZSTD_validateSequence(U32 offCode, U32 matchLength, -- size_t posInSrc, U32 windowLog, size_t dictSize) -+ZSTD_validateSequence(U32 offBase, U32 matchLength, U32 minMatch, -+ size_t posInSrc, U32 windowLog, size_t dictSize, int useSequenceProducer) - { -- U32 const windowSize = 1 << windowLog; -+ U32 const windowSize = 1u << windowLog; - /* posInSrc represents the amount of data the decoder would decode up to this point. - * As long as the amount of data decoded is less than or equal to window size, offsets may be - * larger than the total length of output decoded in order to reference the dict, even larger than - * window size. After output surpasses windowSize, we're limited to windowSize offsets again. - */ - size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; -- RETURN_ERROR_IF(offCode > STORE_OFFSET(offsetBound), corruption_detected, "Offset too large!"); -- RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small"); -+ size_t const matchLenLowerBound = (minMatch == 3 || useSequenceProducer) ? 3 : 4; -+ RETURN_ERROR_IF(offBase > OFFSET_TO_OFFBASE(offsetBound), externalSequences_invalid, "Offset too large!"); -+ /* Validate maxNbSeq is large enough for the given matchLength and minMatch */ -+ RETURN_ERROR_IF(matchLength < matchLenLowerBound, externalSequences_invalid, "Matchlength too small for the minMatch"); - return 0; - } - - /* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */ --static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) -+static U32 ZSTD_finalizeOffBase(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) - { -- U32 offCode = STORE_OFFSET(rawOffset); -+ U32 offBase = OFFSET_TO_OFFBASE(rawOffset); - - if (!ll0 && rawOffset == rep[0]) { -- offCode = STORE_REPCODE_1; -+ offBase = REPCODE1_TO_OFFBASE; - } else if (rawOffset == rep[1]) { -- offCode = STORE_REPCODE(2 - ll0); -+ offBase = REPCODE_TO_OFFBASE(2 - ll0); - } else if (rawOffset == rep[2]) { -- offCode = STORE_REPCODE(3 - ll0); -+ offBase = REPCODE_TO_OFFBASE(3 - ll0); - } else if (ll0 && rawOffset == rep[0] - 1) { -- offCode = STORE_REPCODE_3; -+ offBase = REPCODE3_TO_OFFBASE; - } -- return offCode; -+ return offBase; - } - --/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of -- * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. -+/* This function scans through an array of ZSTD_Sequence, -+ * storing the sequences it reads, until it reaches a block delimiter. -+ * Note that the block delimiter includes the last literals of the block. -+ * @blockSize must be == sum(sequence_lengths). -+ * @returns @blockSize on success, and a ZSTD_error otherwise. - */ - static size_t --ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, -- ZSTD_sequencePosition* seqPos, -- const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, -- const void* src, size_t blockSize) -+ZSTD_transferSequences_wBlockDelim(ZSTD_CCtx* cctx, -+ ZSTD_SequencePosition* seqPos, -+ const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, -+ const void* src, size_t blockSize, -+ ZSTD_ParamSwitch_e externalRepSearch) - { - U32 idx = seqPos->idx; -+ U32 const startIdx = idx; - BYTE const* ip = (BYTE const*)(src); - const BYTE* const iend = ip + blockSize; -- repcodes_t updatedRepcodes; -+ Repcodes_t updatedRepcodes; - U32 dictSize; - -+ DEBUGLOG(5, "ZSTD_transferSequences_wBlockDelim (blockSize = %zu)", blockSize); -+ - if (cctx->cdict) { - dictSize = (U32)cctx->cdict->dictContentSize; - } else if (cctx->prefixDict.dict) { -@@ -5616,27 +6453,60 @@ ZSTD_copySequencesToSeqStoreExplicitBloc - } else { - dictSize = 0; - } -- ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); -- for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) { -+ ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(Repcodes_t)); -+ for (; idx < inSeqsSize && (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0); ++idx) { - U32 const litLength = inSeqs[idx].litLength; -- U32 const ll0 = (litLength == 0); - U32 const matchLength = inSeqs[idx].matchLength; -- U32 const offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0); -- ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); -+ U32 offBase; -+ -+ if (externalRepSearch == ZSTD_ps_disable) { -+ offBase = OFFSET_TO_OFFBASE(inSeqs[idx].offset); -+ } else { -+ U32 const ll0 = (litLength == 0); -+ offBase = ZSTD_finalizeOffBase(inSeqs[idx].offset, updatedRepcodes.rep, ll0); -+ ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0); -+ } - -- DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); -+ DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength); - if (cctx->appliedParams.validateSequences) { - seqPos->posInSrc += litLength + matchLength; -- FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, -- cctx->appliedParams.cParams.windowLog, dictSize), -+ FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, -+ seqPos->posInSrc, -+ cctx->appliedParams.cParams.windowLog, dictSize, -+ ZSTD_hasExtSeqProd(&cctx->appliedParams)), - "Sequence validation failed"); - } -- RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, -+ RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid, - "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); -- ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength); -+ ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength); - ip += matchLength + litLength; - } -- ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); -+ RETURN_ERROR_IF(idx == inSeqsSize, externalSequences_invalid, "Block delimiter not found."); -+ -+ /* If we skipped repcode search while parsing, we need to update repcodes now */ -+ assert(externalRepSearch != ZSTD_ps_auto); -+ assert(idx >= startIdx); -+ if (externalRepSearch == ZSTD_ps_disable && idx != startIdx) { -+ U32* const rep = updatedRepcodes.rep; -+ U32 lastSeqIdx = idx - 1; /* index of last non-block-delimiter sequence */ -+ -+ if (lastSeqIdx >= startIdx + 2) { -+ rep[2] = inSeqs[lastSeqIdx - 2].offset; -+ rep[1] = inSeqs[lastSeqIdx - 1].offset; -+ rep[0] = inSeqs[lastSeqIdx].offset; -+ } else if (lastSeqIdx == startIdx + 1) { -+ rep[2] = rep[0]; -+ rep[1] = inSeqs[lastSeqIdx - 1].offset; -+ rep[0] = inSeqs[lastSeqIdx].offset; -+ } else { -+ assert(lastSeqIdx == startIdx); -+ rep[2] = rep[1]; -+ rep[1] = rep[0]; -+ rep[0] = inSeqs[lastSeqIdx].offset; -+ } -+ } -+ -+ ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t)); - - if (inSeqs[idx].litLength) { - DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength); -@@ -5644,37 +6514,43 @@ ZSTD_copySequencesToSeqStoreExplicitBloc - ip += inSeqs[idx].litLength; - seqPos->posInSrc += inSeqs[idx].litLength; - } -- RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!"); -+ RETURN_ERROR_IF(ip != iend, externalSequences_invalid, "Blocksize doesn't agree with block delimiter!"); - seqPos->idx = idx+1; -- return 0; -+ return blockSize; - } - --/* Returns the number of bytes to move the current read position back by. Only non-zero -- * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something -- * went wrong. -+/* -+ * This function attempts to scan through @blockSize bytes in @src -+ * represented by the sequences in @inSeqs, -+ * storing any (partial) sequences. - * -- * This function will attempt to scan through blockSize bytes represented by the sequences -- * in inSeqs, storing any (partial) sequences. -+ * Occasionally, we may want to reduce the actual number of bytes consumed from @src -+ * to avoid splitting a match, notably if it would produce a match smaller than MINMATCH. - * -- * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to -- * avoid splitting a match, or to avoid splitting a match such that it would produce a match -- * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. -+ * @returns the number of bytes consumed from @src, necessarily <= @blockSize. -+ * Otherwise, it may return a ZSTD error if something went wrong. - */ - static size_t --ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, -- const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, -- const void* src, size_t blockSize) -+ZSTD_transferSequences_noDelim(ZSTD_CCtx* cctx, -+ ZSTD_SequencePosition* seqPos, -+ const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, -+ const void* src, size_t blockSize, -+ ZSTD_ParamSwitch_e externalRepSearch) - { - U32 idx = seqPos->idx; - U32 startPosInSequence = seqPos->posInSequence; - U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize; - size_t dictSize; -- BYTE const* ip = (BYTE const*)(src); -- BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */ -- repcodes_t updatedRepcodes; -+ const BYTE* const istart = (const BYTE*)(src); -+ const BYTE* ip = istart; -+ const BYTE* iend = istart + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */ -+ Repcodes_t updatedRepcodes; - U32 bytesAdjustment = 0; - U32 finalMatchSplit = 0; - -+ /* TODO(embg) support fast parsing mode in noBlockDelim mode */ -+ (void)externalRepSearch; -+ - if (cctx->cdict) { - dictSize = cctx->cdict->dictContentSize; - } else if (cctx->prefixDict.dict) { -@@ -5682,15 +6558,15 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim - } else { - dictSize = 0; - } -- DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize); -+ DEBUGLOG(5, "ZSTD_transferSequences_noDelim: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize); - DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); -- ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); -+ ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(Repcodes_t)); - while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) { - const ZSTD_Sequence currSeq = inSeqs[idx]; - U32 litLength = currSeq.litLength; - U32 matchLength = currSeq.matchLength; - U32 const rawOffset = currSeq.offset; -- U32 offCode; -+ U32 offBase; - - /* Modify the sequence depending on where endPosInSequence lies */ - if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) { -@@ -5704,7 +6580,6 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim - /* Move to the next sequence */ - endPosInSequence -= currSeq.litLength + currSeq.matchLength; - startPosInSequence = 0; -- idx++; - } else { - /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence - does not reach the end of the match. So, we have to split the sequence */ -@@ -5744,58 +6619,113 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim - } - /* Check if this offset can be represented with a repcode */ - { U32 const ll0 = (litLength == 0); -- offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0); -- ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); -+ offBase = ZSTD_finalizeOffBase(rawOffset, updatedRepcodes.rep, ll0); -+ ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0); - } - - if (cctx->appliedParams.validateSequences) { - seqPos->posInSrc += litLength + matchLength; -- FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, -- cctx->appliedParams.cParams.windowLog, dictSize), -+ FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc, -+ cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)), - "Sequence validation failed"); - } -- DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); -- RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, -+ DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength); -+ RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid, - "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); -- ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength); -+ ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength); - ip += matchLength + litLength; -+ if (!finalMatchSplit) -+ idx++; /* Next Sequence */ - } - DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); - assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength); - seqPos->idx = idx; - seqPos->posInSequence = endPosInSequence; -- ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); -+ ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t)); - - iend -= bytesAdjustment; - if (ip != iend) { - /* Store any last literals */ -- U32 lastLLSize = (U32)(iend - ip); -+ U32 const lastLLSize = (U32)(iend - ip); - assert(ip <= iend); - DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize); - ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize); - seqPos->posInSrc += lastLLSize; - } - -- return bytesAdjustment; -+ return (size_t)(iend-istart); - } - --typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, -- const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, -- const void* src, size_t blockSize); --static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) -+/* @seqPos represents a position within @inSeqs, -+ * it is read and updated by this function, -+ * once the goal to produce a block of size @blockSize is reached. -+ * @return: nb of bytes consumed from @src, necessarily <= @blockSize. -+ */ -+typedef size_t (*ZSTD_SequenceCopier_f)(ZSTD_CCtx* cctx, -+ ZSTD_SequencePosition* seqPos, -+ const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, -+ const void* src, size_t blockSize, -+ ZSTD_ParamSwitch_e externalRepSearch); -+ -+static ZSTD_SequenceCopier_f ZSTD_selectSequenceCopier(ZSTD_SequenceFormat_e mode) - { -- ZSTD_sequenceCopier sequenceCopier = NULL; -- assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode)); -+ assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, (int)mode)); - if (mode == ZSTD_sf_explicitBlockDelimiters) { -- return ZSTD_copySequencesToSeqStoreExplicitBlockDelim; -- } else if (mode == ZSTD_sf_noBlockDelimiters) { -- return ZSTD_copySequencesToSeqStoreNoBlockDelim; -+ return ZSTD_transferSequences_wBlockDelim; - } -- assert(sequenceCopier != NULL); -- return sequenceCopier; -+ assert(mode == ZSTD_sf_noBlockDelimiters); -+ return ZSTD_transferSequences_noDelim; - } - --/* Compress, block-by-block, all of the sequences given. -+/* Discover the size of next block by searching for the delimiter. -+ * Note that a block delimiter **must** exist in this mode, -+ * otherwise it's an input error. -+ * The block size retrieved will be later compared to ensure it remains within bounds */ -+static size_t -+blockSize_explicitDelimiter(const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_SequencePosition seqPos) -+{ -+ int end = 0; -+ size_t blockSize = 0; -+ size_t spos = seqPos.idx; -+ DEBUGLOG(6, "blockSize_explicitDelimiter : seq %zu / %zu", spos, inSeqsSize); -+ assert(spos <= inSeqsSize); -+ while (spos < inSeqsSize) { -+ end = (inSeqs[spos].offset == 0); -+ blockSize += inSeqs[spos].litLength + inSeqs[spos].matchLength; -+ if (end) { -+ if (inSeqs[spos].matchLength != 0) -+ RETURN_ERROR(externalSequences_invalid, "delimiter format error : both matchlength and offset must be == 0"); -+ break; -+ } -+ spos++; -+ } -+ if (!end) -+ RETURN_ERROR(externalSequences_invalid, "Reached end of sequences without finding a block delimiter"); -+ return blockSize; -+} -+ -+static size_t determine_blockSize(ZSTD_SequenceFormat_e mode, -+ size_t blockSize, size_t remaining, -+ const ZSTD_Sequence* inSeqs, size_t inSeqsSize, -+ ZSTD_SequencePosition seqPos) -+{ -+ DEBUGLOG(6, "determine_blockSize : remainingSize = %zu", remaining); -+ if (mode == ZSTD_sf_noBlockDelimiters) { -+ /* Note: more a "target" block size */ -+ return MIN(remaining, blockSize); -+ } -+ assert(mode == ZSTD_sf_explicitBlockDelimiters); -+ { size_t const explicitBlockSize = blockSize_explicitDelimiter(inSeqs, inSeqsSize, seqPos); -+ FORWARD_IF_ERROR(explicitBlockSize, "Error while determining block size with explicit delimiters"); -+ if (explicitBlockSize > blockSize) -+ RETURN_ERROR(externalSequences_invalid, "sequences incorrectly define a too large block"); -+ if (explicitBlockSize > remaining) -+ RETURN_ERROR(externalSequences_invalid, "sequences define a frame longer than source"); -+ return explicitBlockSize; -+ } -+} -+ -+/* Compress all provided sequences, block-by-block. - * - * Returns the cumulative size of all compressed blocks (including their headers), - * otherwise a ZSTD error. -@@ -5807,15 +6737,12 @@ ZSTD_compressSequences_internal(ZSTD_CCt - const void* src, size_t srcSize) - { - size_t cSize = 0; -- U32 lastBlock; -- size_t blockSize; -- size_t compressedSeqsSize; - size_t remaining = srcSize; -- ZSTD_sequencePosition seqPos = {0, 0, 0}; -+ ZSTD_SequencePosition seqPos = {0, 0, 0}; - -- BYTE const* ip = (BYTE const*)src; -+ const BYTE* ip = (BYTE const*)src; - BYTE* op = (BYTE*)dst; -- ZSTD_sequenceCopier const sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); -+ ZSTD_SequenceCopier_f const sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); - - DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize); - /* Special case: empty frame */ -@@ -5829,22 +6756,29 @@ ZSTD_compressSequences_internal(ZSTD_CCt - } - - while (remaining) { -+ size_t compressedSeqsSize; - size_t cBlockSize; -- size_t additionalByteAdjustment; -- lastBlock = remaining <= cctx->blockSize; -- blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize; -+ size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters, -+ cctx->blockSizeMax, remaining, -+ inSeqs, inSeqsSize, seqPos); -+ U32 const lastBlock = (blockSize == remaining); -+ FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size"); -+ assert(blockSize <= remaining); - ZSTD_resetSeqStore(&cctx->seqStore); -- DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize); - -- additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize); -- FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy"); -- blockSize -= additionalByteAdjustment; -+ blockSize = sequenceCopier(cctx, -+ &seqPos, inSeqs, inSeqsSize, -+ ip, blockSize, -+ cctx->appliedParams.searchForExternalRepcodes); -+ FORWARD_IF_ERROR(blockSize, "Bad sequence copy"); - - /* If blocks are too small, emit as a nocompress block */ -- if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { -+ /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding -+ * additional 1. We need to revisit and change this logic to be more consistent */ -+ if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { - cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); - FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); -- DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize); -+ DEBUGLOG(5, "Block too small (%zu): data remains uncompressed: cSize=%zu", blockSize, cBlockSize); - cSize += cBlockSize; - ip += blockSize; - op += cBlockSize; -@@ -5853,35 +6787,36 @@ ZSTD_compressSequences_internal(ZSTD_CCt - continue; - } - -+ RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block"); - compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore, - &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, - &cctx->appliedParams, - op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, - blockSize, -- cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, -+ cctx->tmpWorkspace, cctx->tmpWkspSize /* statically allocated in resetCCtx */, - cctx->bmi2); - FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed"); -- DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize); -+ DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize); - - if (!cctx->isFirstBlock && - ZSTD_maybeRLE(&cctx->seqStore) && -- ZSTD_isRLE((BYTE const*)src, srcSize)) { -- /* We don't want to emit our first block as a RLE even if it qualifies because -- * doing so will cause the decoder (cli only) to throw a "should consume all input error." -- * This is only an issue for zstd <= v1.4.3 -- */ -+ ZSTD_isRLE(ip, blockSize)) { -+ /* Note: don't emit the first block as RLE even if it qualifies because -+ * doing so will cause the decoder (cli <= v1.4.3 only) to throw an (invalid) error -+ * "should consume all input error." -+ */ - compressedSeqsSize = 1; - } - - if (compressedSeqsSize == 0) { - /* ZSTD_noCompressBlock writes the block header as well */ - cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); -- FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); -- DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize); -+ FORWARD_IF_ERROR(cBlockSize, "ZSTD_noCompressBlock failed"); -+ DEBUGLOG(5, "Writing out nocompress block, size: %zu", cBlockSize); - } else if (compressedSeqsSize == 1) { - cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock); -- FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed"); -- DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize); -+ FORWARD_IF_ERROR(cBlockSize, "ZSTD_rleCompressBlock failed"); -+ DEBUGLOG(5, "Writing out RLE block, size: %zu", cBlockSize); - } else { - U32 cBlockHeader; - /* Error checking and repcodes update */ -@@ -5893,11 +6828,10 @@ ZSTD_compressSequences_internal(ZSTD_CCt - cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3); - MEM_writeLE24(op, cBlockHeader); - cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize; -- DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize); -+ DEBUGLOG(5, "Writing out compressed block, size: %zu", cBlockSize); - } - - cSize += cBlockSize; -- DEBUGLOG(4, "cSize running total: %zu", cSize); - - if (lastBlock) { - break; -@@ -5908,41 +6842,50 @@ ZSTD_compressSequences_internal(ZSTD_CCt - dstCapacity -= cBlockSize; - cctx->isFirstBlock = 0; - } -+ DEBUGLOG(5, "cSize running total: %zu (remaining dstCapacity=%zu)", cSize, dstCapacity); - } - -+ DEBUGLOG(4, "cSize final total: %zu", cSize); - return cSize; - } - --size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity, -+size_t ZSTD_compressSequences(ZSTD_CCtx* cctx, -+ void* dst, size_t dstCapacity, - const ZSTD_Sequence* inSeqs, size_t inSeqsSize, - const void* src, size_t srcSize) - { - BYTE* op = (BYTE*)dst; - size_t cSize = 0; -- size_t compressedBlocksSize = 0; -- size_t frameHeaderSize = 0; - - /* Transparent initialization stage, same as compressStream2() */ -- DEBUGLOG(3, "ZSTD_compressSequences()"); -+ DEBUGLOG(4, "ZSTD_compressSequences (nbSeqs=%zu,dstCapacity=%zu)", inSeqsSize, dstCapacity); - assert(cctx != NULL); - FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed"); -+ - /* Begin writing output, starting with frame header */ -- frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID); -- op += frameHeaderSize; -- dstCapacity -= frameHeaderSize; -- cSize += frameHeaderSize; -+ { size_t const frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, -+ &cctx->appliedParams, srcSize, cctx->dictID); -+ op += frameHeaderSize; -+ assert(frameHeaderSize <= dstCapacity); -+ dstCapacity -= frameHeaderSize; -+ cSize += frameHeaderSize; -+ } - if (cctx->appliedParams.fParams.checksumFlag && srcSize) { - xxh64_update(&cctx->xxhState, src, srcSize); - } -- /* cSize includes block header size and compressed sequences size */ -- compressedBlocksSize = ZSTD_compressSequences_internal(cctx, -+ -+ /* Now generate compressed blocks */ -+ { size_t const cBlocksSize = ZSTD_compressSequences_internal(cctx, - op, dstCapacity, - inSeqs, inSeqsSize, - src, srcSize); -- FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!"); -- cSize += compressedBlocksSize; -- dstCapacity -= compressedBlocksSize; -+ FORWARD_IF_ERROR(cBlocksSize, "Compressing blocks failed!"); -+ cSize += cBlocksSize; -+ assert(cBlocksSize <= dstCapacity); -+ dstCapacity -= cBlocksSize; -+ } - -+ /* Complete with frame checksum, if needed */ - if (cctx->appliedParams.fParams.checksumFlag) { - U32 const checksum = (U32) xxh64_digest(&cctx->xxhState); - RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); -@@ -5951,26 +6894,557 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* - cSize += 4; - } - -- DEBUGLOG(3, "Final compressed size: %zu", cSize); -+ DEBUGLOG(4, "Final compressed size: %zu", cSize); -+ return cSize; -+} -+ -+ -+#if defined(__AVX2__) -+ -+#include /* AVX2 intrinsics */ -+ -+/* -+ * Convert 2 sequences per iteration, using AVX2 intrinsics: -+ * - offset -> offBase = offset + 2 -+ * - litLength -> (U16) litLength -+ * - matchLength -> (U16)(matchLength - 3) -+ * - rep is ignored -+ * Store only 8 bytes per SeqDef (offBase[4], litLength[2], mlBase[2]). -+ * -+ * At the end, instead of extracting two __m128i, -+ * we use _mm256_permute4x64_epi64(..., 0xE8) to move lane2 into lane1, -+ * then store the lower 16 bytes in one go. -+ * -+ * @returns 0 on succes, with no long length detected -+ * @returns > 0 if there is one long length (> 65535), -+ * indicating the position, and type. -+ */ -+static size_t convertSequences_noRepcodes( -+ SeqDef* dstSeqs, -+ const ZSTD_Sequence* inSeqs, -+ size_t nbSequences) -+{ -+ /* -+ * addition: -+ * For each 128-bit half: (offset+2, litLength+0, matchLength-3, rep+0) -+ */ -+ const __m256i addition = _mm256_setr_epi32( -+ ZSTD_REP_NUM, 0, -MINMATCH, 0, /* for sequence i */ -+ ZSTD_REP_NUM, 0, -MINMATCH, 0 /* for sequence i+1 */ -+ ); -+ -+ /* limit: check if there is a long length */ -+ const __m256i limit = _mm256_set1_epi32(65535); -+ -+ /* -+ * shuffle mask for byte-level rearrangement in each 128-bit half: -+ * -+ * Input layout (after addition) per 128-bit half: -+ * [ offset+2 (4 bytes) | litLength (4 bytes) | matchLength (4 bytes) | rep (4 bytes) ] -+ * We only need: -+ * offBase (4 bytes) = offset+2 -+ * litLength (2 bytes) = low 2 bytes of litLength -+ * mlBase (2 bytes) = low 2 bytes of (matchLength) -+ * => Bytes [0..3, 4..5, 8..9], zero the rest. -+ */ -+ const __m256i mask = _mm256_setr_epi8( -+ /* For the lower 128 bits => sequence i */ -+ 0, 1, 2, 3, /* offset+2 */ -+ 4, 5, /* litLength (16 bits) */ -+ 8, 9, /* matchLength (16 bits) */ -+ (BYTE)0x80, (BYTE)0x80, (BYTE)0x80, (BYTE)0x80, -+ (BYTE)0x80, (BYTE)0x80, (BYTE)0x80, (BYTE)0x80, -+ -+ /* For the upper 128 bits => sequence i+1 */ -+ 16,17,18,19, /* offset+2 */ -+ 20,21, /* litLength */ -+ 24,25, /* matchLength */ -+ (BYTE)0x80, (BYTE)0x80, (BYTE)0x80, (BYTE)0x80, -+ (BYTE)0x80, (BYTE)0x80, (BYTE)0x80, (BYTE)0x80 -+ ); -+ -+ /* -+ * Next, we'll use _mm256_permute4x64_epi64(vshf, 0xE8). -+ * Explanation of 0xE8 = 11101000b => [lane0, lane2, lane2, lane3]. -+ * So the lower 128 bits become [lane0, lane2] => combining seq0 and seq1. -+ */ -+#define PERM_LANE_0X_E8 0xE8 /* [0,2,2,3] in lane indices */ -+ -+ size_t longLen = 0, i = 0; -+ -+ /* AVX permutation depends on the specific definition of target structures */ -+ ZSTD_STATIC_ASSERT(sizeof(ZSTD_Sequence) == 16); -+ ZSTD_STATIC_ASSERT(offsetof(ZSTD_Sequence, offset) == 0); -+ ZSTD_STATIC_ASSERT(offsetof(ZSTD_Sequence, litLength) == 4); -+ ZSTD_STATIC_ASSERT(offsetof(ZSTD_Sequence, matchLength) == 8); -+ ZSTD_STATIC_ASSERT(sizeof(SeqDef) == 8); -+ ZSTD_STATIC_ASSERT(offsetof(SeqDef, offBase) == 0); -+ ZSTD_STATIC_ASSERT(offsetof(SeqDef, litLength) == 4); -+ ZSTD_STATIC_ASSERT(offsetof(SeqDef, mlBase) == 6); -+ -+ /* Process 2 sequences per loop iteration */ -+ for (; i + 1 < nbSequences; i += 2) { -+ /* Load 2 ZSTD_Sequence (32 bytes) */ -+ __m256i vin = _mm256_loadu_si256((const __m256i*)(const void*)&inSeqs[i]); -+ -+ /* Add {2, 0, -3, 0} in each 128-bit half */ -+ __m256i vadd = _mm256_add_epi32(vin, addition); -+ -+ /* Check for long length */ -+ __m256i ll_cmp = _mm256_cmpgt_epi32(vadd, limit); /* 0xFFFFFFFF for element > 65535 */ -+ int ll_res = _mm256_movemask_epi8(ll_cmp); -+ -+ /* Shuffle bytes so each half gives us the 8 bytes we need */ -+ __m256i vshf = _mm256_shuffle_epi8(vadd, mask); -+ /* -+ * Now: -+ * Lane0 = seq0's 8 bytes -+ * Lane1 = 0 -+ * Lane2 = seq1's 8 bytes -+ * Lane3 = 0 -+ */ -+ -+ /* Permute 64-bit lanes => move Lane2 down into Lane1. */ -+ __m256i vperm = _mm256_permute4x64_epi64(vshf, PERM_LANE_0X_E8); -+ /* -+ * Now the lower 16 bytes (Lane0+Lane1) = [seq0, seq1]. -+ * The upper 16 bytes are [Lane2, Lane3] = [seq1, 0], but we won't use them. -+ */ -+ -+ /* Store only the lower 16 bytes => 2 SeqDef (8 bytes each) */ -+ _mm_storeu_si128((__m128i *)(void*)&dstSeqs[i], _mm256_castsi256_si128(vperm)); -+ /* -+ * This writes out 16 bytes total: -+ * - offset 0..7 => seq0 (offBase, litLength, mlBase) -+ * - offset 8..15 => seq1 (offBase, litLength, mlBase) -+ */ -+ -+ /* check (unlikely) long lengths > 65535 -+ * indices for lengths correspond to bits [4..7], [8..11], [20..23], [24..27] -+ * => combined mask = 0x0FF00FF0 -+ */ -+ if (UNLIKELY((ll_res & 0x0FF00FF0) != 0)) { -+ /* long length detected: let's figure out which one*/ -+ if (inSeqs[i].matchLength > 65535+MINMATCH) { -+ assert(longLen == 0); -+ longLen = i + 1; -+ } -+ if (inSeqs[i].litLength > 65535) { -+ assert(longLen == 0); -+ longLen = i + nbSequences + 1; -+ } -+ if (inSeqs[i+1].matchLength > 65535+MINMATCH) { -+ assert(longLen == 0); -+ longLen = i + 1 + 1; -+ } -+ if (inSeqs[i+1].litLength > 65535) { -+ assert(longLen == 0); -+ longLen = i + 1 + nbSequences + 1; -+ } -+ } -+ } -+ -+ /* Handle leftover if @nbSequences is odd */ -+ if (i < nbSequences) { -+ /* process last sequence */ -+ assert(i == nbSequences - 1); -+ dstSeqs[i].offBase = OFFSET_TO_OFFBASE(inSeqs[i].offset); -+ dstSeqs[i].litLength = (U16)inSeqs[i].litLength; -+ dstSeqs[i].mlBase = (U16)(inSeqs[i].matchLength - MINMATCH); -+ /* check (unlikely) long lengths > 65535 */ -+ if (UNLIKELY(inSeqs[i].matchLength > 65535+MINMATCH)) { -+ assert(longLen == 0); -+ longLen = i + 1; -+ } -+ if (UNLIKELY(inSeqs[i].litLength > 65535)) { -+ assert(longLen == 0); -+ longLen = i + nbSequences + 1; -+ } -+ } -+ -+ return longLen; -+} -+ -+/* the vector implementation could also be ported to SSSE3, -+ * but since this implementation is targeting modern systems (>= Sapphire Rapid), -+ * it's not useful to develop and maintain code for older pre-AVX2 platforms */ -+ -+#else /* no AVX2 */ -+ -+static size_t convertSequences_noRepcodes( -+ SeqDef* dstSeqs, -+ const ZSTD_Sequence* inSeqs, -+ size_t nbSequences) -+{ -+ size_t longLen = 0; -+ size_t n; -+ for (n=0; n 65535 */ -+ if (UNLIKELY(inSeqs[n].matchLength > 65535+MINMATCH)) { -+ assert(longLen == 0); -+ longLen = n + 1; -+ } -+ if (UNLIKELY(inSeqs[n].litLength > 65535)) { -+ assert(longLen == 0); -+ longLen = n + nbSequences + 1; -+ } -+ } -+ return longLen; -+} -+ -+#endif -+ -+/* -+ * Precondition: Sequences must end on an explicit Block Delimiter -+ * @return: 0 on success, or an error code. -+ * Note: Sequence validation functionality has been disabled (removed). -+ * This is helpful to generate a lean main pipeline, improving performance. -+ * It may be re-inserted later. -+ */ -+size_t ZSTD_convertBlockSequences(ZSTD_CCtx* cctx, -+ const ZSTD_Sequence* const inSeqs, size_t nbSequences, -+ int repcodeResolution) -+{ -+ Repcodes_t updatedRepcodes; -+ size_t seqNb = 0; -+ -+ DEBUGLOG(5, "ZSTD_convertBlockSequences (nbSequences = %zu)", nbSequences); -+ -+ RETURN_ERROR_IF(nbSequences >= cctx->seqStore.maxNbSeq, externalSequences_invalid, -+ "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); -+ -+ ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(Repcodes_t)); -+ -+ /* check end condition */ -+ assert(nbSequences >= 1); -+ assert(inSeqs[nbSequences-1].matchLength == 0); -+ assert(inSeqs[nbSequences-1].offset == 0); -+ -+ /* Convert Sequences from public format to internal format */ -+ if (!repcodeResolution) { -+ size_t const longl = convertSequences_noRepcodes(cctx->seqStore.sequencesStart, inSeqs, nbSequences-1); -+ cctx->seqStore.sequences = cctx->seqStore.sequencesStart + nbSequences-1; -+ if (longl) { -+ DEBUGLOG(5, "long length"); -+ assert(cctx->seqStore.longLengthType == ZSTD_llt_none); -+ if (longl <= nbSequences-1) { -+ DEBUGLOG(5, "long match length detected at pos %zu", longl-1); -+ cctx->seqStore.longLengthType = ZSTD_llt_matchLength; -+ cctx->seqStore.longLengthPos = (U32)(longl-1); -+ } else { -+ DEBUGLOG(5, "long literals length detected at pos %zu", longl-nbSequences); -+ assert(longl <= 2* (nbSequences-1)); -+ cctx->seqStore.longLengthType = ZSTD_llt_literalLength; -+ cctx->seqStore.longLengthPos = (U32)(longl-(nbSequences-1)-1); -+ } -+ } -+ } else { -+ for (seqNb = 0; seqNb < nbSequences - 1 ; seqNb++) { -+ U32 const litLength = inSeqs[seqNb].litLength; -+ U32 const matchLength = inSeqs[seqNb].matchLength; -+ U32 const ll0 = (litLength == 0); -+ U32 const offBase = ZSTD_finalizeOffBase(inSeqs[seqNb].offset, updatedRepcodes.rep, ll0); -+ -+ DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength); -+ ZSTD_storeSeqOnly(&cctx->seqStore, litLength, offBase, matchLength); -+ ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0); -+ } -+ } -+ -+ /* If we skipped repcode search while parsing, we need to update repcodes now */ -+ if (!repcodeResolution && nbSequences > 1) { -+ U32* const rep = updatedRepcodes.rep; -+ -+ if (nbSequences >= 4) { -+ U32 lastSeqIdx = (U32)nbSequences - 2; /* index of last full sequence */ -+ rep[2] = inSeqs[lastSeqIdx - 2].offset; -+ rep[1] = inSeqs[lastSeqIdx - 1].offset; -+ rep[0] = inSeqs[lastSeqIdx].offset; -+ } else if (nbSequences == 3) { -+ rep[2] = rep[0]; -+ rep[1] = inSeqs[0].offset; -+ rep[0] = inSeqs[1].offset; -+ } else { -+ assert(nbSequences == 2); -+ rep[2] = rep[1]; -+ rep[1] = rep[0]; -+ rep[0] = inSeqs[0].offset; -+ } -+ } -+ -+ ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t)); -+ -+ return 0; -+} -+ -+#if defined(ZSTD_ARCH_X86_AVX2) -+ -+BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs) -+{ -+ size_t i; -+ __m256i const zeroVec = _mm256_setzero_si256(); -+ __m256i sumVec = zeroVec; /* accumulates match+lit in 32-bit lanes */ -+ ZSTD_ALIGNED(32) U32 tmp[8]; /* temporary buffer for reduction */ -+ size_t mSum = 0, lSum = 0; -+ ZSTD_STATIC_ASSERT(sizeof(ZSTD_Sequence) == 16); -+ -+ /* Process 2 structs (32 bytes) at a time */ -+ for (i = 0; i + 2 <= nbSeqs; i += 2) { -+ /* Load two consecutive ZSTD_Sequence (8×4 = 32 bytes) */ -+ __m256i data = _mm256_loadu_si256((const __m256i*)(const void*)&seqs[i]); -+ /* check end of block signal */ -+ __m256i cmp = _mm256_cmpeq_epi32(data, zeroVec); -+ int cmp_res = _mm256_movemask_epi8(cmp); -+ /* indices for match lengths correspond to bits [8..11], [24..27] -+ * => combined mask = 0x0F000F00 */ -+ ZSTD_STATIC_ASSERT(offsetof(ZSTD_Sequence, matchLength) == 8); -+ if (cmp_res & 0x0F000F00) break; -+ /* Accumulate in sumVec */ -+ sumVec = _mm256_add_epi32(sumVec, data); -+ } -+ -+ /* Horizontal reduction */ -+ _mm256_store_si256((__m256i*)tmp, sumVec); -+ lSum = tmp[1] + tmp[5]; -+ mSum = tmp[2] + tmp[6]; -+ -+ /* Handle the leftover */ -+ for (; i < nbSeqs; i++) { -+ lSum += seqs[i].litLength; -+ mSum += seqs[i].matchLength; -+ if (seqs[i].matchLength == 0) break; /* end of block */ -+ } -+ -+ if (i==nbSeqs) { -+ /* reaching end of sequences: end of block signal was not present */ -+ BlockSummary bs; -+ bs.nbSequences = ERROR(externalSequences_invalid); -+ return bs; -+ } -+ { BlockSummary bs; -+ bs.nbSequences = i+1; -+ bs.blockSize = lSum + mSum; -+ bs.litSize = lSum; -+ return bs; -+ } -+} -+ -+#else -+ -+BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs) -+{ -+ size_t totalMatchSize = 0; -+ size_t litSize = 0; -+ size_t n; -+ assert(seqs); -+ for (n=0; nappliedParams.searchForExternalRepcodes == ZSTD_ps_enable); -+ assert(cctx->appliedParams.searchForExternalRepcodes != ZSTD_ps_auto); -+ -+ DEBUGLOG(4, "ZSTD_compressSequencesAndLiterals_internal: nbSeqs=%zu, litSize=%zu", nbSequences, litSize); -+ RETURN_ERROR_IF(nbSequences == 0, externalSequences_invalid, "Requires at least 1 end-of-block"); -+ -+ /* Special case: empty frame */ -+ if ((nbSequences == 1) && (inSeqs[0].litLength == 0)) { -+ U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1); -+ RETURN_ERROR_IF(dstCapacity<3, dstSize_tooSmall, "No room for empty frame block header"); -+ MEM_writeLE24(op, cBlockHeader24); -+ op += ZSTD_blockHeaderSize; -+ dstCapacity -= ZSTD_blockHeaderSize; -+ cSize += ZSTD_blockHeaderSize; -+ } -+ -+ while (nbSequences) { -+ size_t compressedSeqsSize, cBlockSize, conversionStatus; -+ BlockSummary const block = ZSTD_get1BlockSummary(inSeqs, nbSequences); -+ U32 const lastBlock = (block.nbSequences == nbSequences); -+ FORWARD_IF_ERROR(block.nbSequences, "Error while trying to determine nb of sequences for a block"); -+ assert(block.nbSequences <= nbSequences); -+ RETURN_ERROR_IF(block.litSize > litSize, externalSequences_invalid, "discrepancy: Sequences require more literals than present in buffer"); -+ ZSTD_resetSeqStore(&cctx->seqStore); -+ -+ conversionStatus = ZSTD_convertBlockSequences(cctx, -+ inSeqs, block.nbSequences, -+ repcodeResolution); -+ FORWARD_IF_ERROR(conversionStatus, "Bad sequence conversion"); -+ inSeqs += block.nbSequences; -+ nbSequences -= block.nbSequences; -+ remaining -= block.blockSize; -+ -+ /* Note: when blockSize is very small, other variant send it uncompressed. -+ * Here, we still send the sequences, because we don't have the original source to send it uncompressed. -+ * One could imagine in theory reproducing the source from the sequences, -+ * but that's complex and costly memory intensive, and goes against the objectives of this variant. */ -+ -+ RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block"); -+ -+ compressedSeqsSize = ZSTD_entropyCompressSeqStore_internal( -+ op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, -+ literals, block.litSize, -+ &cctx->seqStore, -+ &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, -+ &cctx->appliedParams, -+ cctx->tmpWorkspace, cctx->tmpWkspSize /* statically allocated in resetCCtx */, -+ cctx->bmi2); -+ FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed"); -+ /* note: the spec forbids for any compressed block to be larger than maximum block size */ -+ if (compressedSeqsSize > cctx->blockSizeMax) compressedSeqsSize = 0; -+ DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize); -+ litSize -= block.litSize; -+ literals = (const char*)literals + block.litSize; -+ -+ /* Note: difficult to check source for RLE block when only Literals are provided, -+ * but it could be considered from analyzing the sequence directly */ -+ -+ if (compressedSeqsSize == 0) { -+ /* Sending uncompressed blocks is out of reach, because the source is not provided. -+ * In theory, one could use the sequences to regenerate the source, like a decompressor, -+ * but it's complex, and memory hungry, killing the purpose of this variant. -+ * Current outcome: generate an error code. -+ */ -+ RETURN_ERROR(cannotProduce_uncompressedBlock, "ZSTD_compressSequencesAndLiterals cannot generate an uncompressed block"); -+ } else { -+ U32 cBlockHeader; -+ assert(compressedSeqsSize > 1); /* no RLE */ -+ /* Error checking and repcodes update */ -+ ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState); -+ if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) -+ cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; -+ -+ /* Write block header into beginning of block*/ -+ cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3); -+ MEM_writeLE24(op, cBlockHeader); -+ cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize; -+ DEBUGLOG(5, "Writing out compressed block, size: %zu", cBlockSize); -+ } -+ -+ cSize += cBlockSize; -+ op += cBlockSize; -+ dstCapacity -= cBlockSize; -+ cctx->isFirstBlock = 0; -+ DEBUGLOG(5, "cSize running total: %zu (remaining dstCapacity=%zu)", cSize, dstCapacity); -+ -+ if (lastBlock) { -+ assert(nbSequences == 0); -+ break; -+ } -+ } -+ -+ RETURN_ERROR_IF(litSize != 0, externalSequences_invalid, "literals must be entirely and exactly consumed"); -+ RETURN_ERROR_IF(remaining != 0, externalSequences_invalid, "Sequences must represent a total of exactly srcSize=%zu", srcSize); -+ DEBUGLOG(4, "cSize final total: %zu", cSize); -+ return cSize; -+} -+ -+size_t -+ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx, -+ void* dst, size_t dstCapacity, -+ const ZSTD_Sequence* inSeqs, size_t inSeqsSize, -+ const void* literals, size_t litSize, size_t litCapacity, -+ size_t decompressedSize) -+{ -+ BYTE* op = (BYTE*)dst; -+ size_t cSize = 0; -+ -+ /* Transparent initialization stage, same as compressStream2() */ -+ DEBUGLOG(4, "ZSTD_compressSequencesAndLiterals (dstCapacity=%zu)", dstCapacity); -+ assert(cctx != NULL); -+ if (litCapacity < litSize) { -+ RETURN_ERROR(workSpace_tooSmall, "literals buffer is not large enough: must be at least 8 bytes larger than litSize (risk of read out-of-bound)"); -+ } -+ FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, decompressedSize), "CCtx initialization failed"); -+ -+ if (cctx->appliedParams.blockDelimiters == ZSTD_sf_noBlockDelimiters) { -+ RETURN_ERROR(frameParameter_unsupported, "This mode is only compatible with explicit delimiters"); -+ } -+ if (cctx->appliedParams.validateSequences) { -+ RETURN_ERROR(parameter_unsupported, "This mode is not compatible with Sequence validation"); -+ } -+ if (cctx->appliedParams.fParams.checksumFlag) { -+ RETURN_ERROR(frameParameter_unsupported, "this mode is not compatible with frame checksum"); -+ } -+ -+ /* Begin writing output, starting with frame header */ -+ { size_t const frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, -+ &cctx->appliedParams, decompressedSize, cctx->dictID); -+ op += frameHeaderSize; -+ assert(frameHeaderSize <= dstCapacity); -+ dstCapacity -= frameHeaderSize; -+ cSize += frameHeaderSize; -+ } -+ -+ /* Now generate compressed blocks */ -+ { size_t const cBlocksSize = ZSTD_compressSequencesAndLiterals_internal(cctx, -+ op, dstCapacity, -+ inSeqs, inSeqsSize, -+ literals, litSize, decompressedSize); -+ FORWARD_IF_ERROR(cBlocksSize, "Compressing blocks failed!"); -+ cSize += cBlocksSize; -+ assert(cBlocksSize <= dstCapacity); -+ dstCapacity -= cBlocksSize; -+ } -+ -+ DEBUGLOG(4, "Final compressed size: %zu", cSize); - return cSize; - } - - /*====== Finalize ======*/ - -+static ZSTD_inBuffer inBuffer_forEndFlush(const ZSTD_CStream* zcs) -+{ -+ const ZSTD_inBuffer nullInput = { NULL, 0, 0 }; -+ const int stableInput = (zcs->appliedParams.inBufferMode == ZSTD_bm_stable); -+ return stableInput ? zcs->expectedInBuffer : nullInput; -+} -+ - /*! ZSTD_flushStream() : - * @return : amount of data remaining to flush */ - size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) - { -- ZSTD_inBuffer input = { NULL, 0, 0 }; -+ ZSTD_inBuffer input = inBuffer_forEndFlush(zcs); -+ input.size = input.pos; /* do not ingest more input during flush */ - return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush); - } - -- - size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) - { -- ZSTD_inBuffer input = { NULL, 0, 0 }; -+ ZSTD_inBuffer input = inBuffer_forEndFlush(zcs); - size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); -- FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed"); -+ FORWARD_IF_ERROR(remainingToFlush , "ZSTD_compressStream2(,,ZSTD_e_end) failed"); - if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ - /* single thread mode : attempt to calculate remaining to flush more precisely */ - { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; -@@ -6046,7 +7520,7 @@ static void ZSTD_dedicatedDictSearch_rev - } - } - --static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) -+static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode) - { - switch (mode) { - case ZSTD_cpm_unknown: -@@ -6070,8 +7544,8 @@ static U64 ZSTD_getCParamRowSize(U64 src - * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. - * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. - * Use dictSize == 0 for unknown or unused. -- * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */ --static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) -+ * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_CParamMode_e`. */ -+static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode) - { - U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode); - U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); -@@ -6092,7 +7566,7 @@ static ZSTD_compressionParameters ZSTD_g - cp.targetLength = (unsigned)(-clampedCompressionLevel); - } - /* refine parameters based on srcSize & dictSize */ -- return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode); -+ return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode, ZSTD_ps_auto); - } - } - -@@ -6109,7 +7583,9 @@ ZSTD_compressionParameters ZSTD_getCPara - * same idea as ZSTD_getCParams() - * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). - * Fields of `ZSTD_frameParameters` are set to default values */ --static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { -+static ZSTD_parameters -+ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode) -+{ - ZSTD_parameters params; - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode); - DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); -@@ -6123,7 +7599,34 @@ static ZSTD_parameters ZSTD_getParams_in - * same idea as ZSTD_getCParams() - * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). - * Fields of `ZSTD_frameParameters` are set to default values */ --ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { -+ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) -+{ - if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; - return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); - } -+ -+void ZSTD_registerSequenceProducer( -+ ZSTD_CCtx* zc, -+ void* extSeqProdState, -+ ZSTD_sequenceProducer_F extSeqProdFunc) -+{ -+ assert(zc != NULL); -+ ZSTD_CCtxParams_registerSequenceProducer( -+ &zc->requestedParams, extSeqProdState, extSeqProdFunc -+ ); -+} -+ -+void ZSTD_CCtxParams_registerSequenceProducer( -+ ZSTD_CCtx_params* params, -+ void* extSeqProdState, -+ ZSTD_sequenceProducer_F extSeqProdFunc) -+{ -+ assert(params != NULL); -+ if (extSeqProdFunc != NULL) { -+ params->extSeqProdFunc = extSeqProdFunc; -+ params->extSeqProdState = extSeqProdState; -+ } else { -+ params->extSeqProdFunc = NULL; -+ params->extSeqProdState = NULL; -+ } -+} ---- a/lib/zstd/compress/zstd_compress_internal.h -+++ b/lib/zstd/compress/zstd_compress_internal.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -20,7 +21,8 @@ - ***************************************/ - #include "../common/zstd_internal.h" - #include "zstd_cwksp.h" -- -+#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */ -+#include "zstd_preSplit.h" /* ZSTD_SLIPBLOCK_WORKSPACESIZE */ - - /*-************************************* - * Constants -@@ -32,7 +34,7 @@ - It's not a big deal though : candidate will just be sorted again. - Additionally, candidate position 1 will be lost. - But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. -- The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. -+ The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy. - This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ - - -@@ -76,6 +78,70 @@ typedef struct { - } ZSTD_entropyCTables_t; - - /* ********************************************* -+* Sequences * -+***********************************************/ -+typedef struct SeqDef_s { -+ U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */ -+ U16 litLength; -+ U16 mlBase; /* mlBase == matchLength - MINMATCH */ -+} SeqDef; -+ -+/* Controls whether seqStore has a single "long" litLength or matchLength. See SeqStore_t. */ -+typedef enum { -+ ZSTD_llt_none = 0, /* no longLengthType */ -+ ZSTD_llt_literalLength = 1, /* represents a long literal */ -+ ZSTD_llt_matchLength = 2 /* represents a long match */ -+} ZSTD_longLengthType_e; -+ -+typedef struct { -+ SeqDef* sequencesStart; -+ SeqDef* sequences; /* ptr to end of sequences */ -+ BYTE* litStart; -+ BYTE* lit; /* ptr to end of literals */ -+ BYTE* llCode; -+ BYTE* mlCode; -+ BYTE* ofCode; -+ size_t maxNbSeq; -+ size_t maxNbLit; -+ -+ /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength -+ * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment -+ * the existing value of the litLength or matchLength by 0x10000. -+ */ -+ ZSTD_longLengthType_e longLengthType; -+ U32 longLengthPos; /* Index of the sequence to apply long length modification to */ -+} SeqStore_t; -+ -+typedef struct { -+ U32 litLength; -+ U32 matchLength; -+} ZSTD_SequenceLength; -+ -+/* -+ * Returns the ZSTD_SequenceLength for the given sequences. It handles the decoding of long sequences -+ * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength. -+ */ -+MEM_STATIC ZSTD_SequenceLength ZSTD_getSequenceLength(SeqStore_t const* seqStore, SeqDef const* seq) -+{ -+ ZSTD_SequenceLength seqLen; -+ seqLen.litLength = seq->litLength; -+ seqLen.matchLength = seq->mlBase + MINMATCH; -+ if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { -+ if (seqStore->longLengthType == ZSTD_llt_literalLength) { -+ seqLen.litLength += 0x10000; -+ } -+ if (seqStore->longLengthType == ZSTD_llt_matchLength) { -+ seqLen.matchLength += 0x10000; -+ } -+ } -+ return seqLen; -+} -+ -+const SeqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ -+int ZSTD_seqToCodes(const SeqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ -+ -+ -+/* ********************************************* - * Entropy buffer statistics structs and funcs * - ***********************************************/ - /* ZSTD_hufCTablesMetadata_t : -@@ -84,7 +150,7 @@ typedef struct { - * hufDesSize refers to the size of huffman tree description in bytes. - * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */ - typedef struct { -- symbolEncodingType_e hType; -+ SymbolEncodingType_e hType; - BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; - size_t hufDesSize; - } ZSTD_hufCTablesMetadata_t; -@@ -95,9 +161,9 @@ typedef struct { - * fseTablesSize refers to the size of fse tables in bytes. - * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */ - typedef struct { -- symbolEncodingType_e llType; -- symbolEncodingType_e ofType; -- symbolEncodingType_e mlType; -+ SymbolEncodingType_e llType; -+ SymbolEncodingType_e ofType; -+ SymbolEncodingType_e mlType; - BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; - size_t fseTablesSize; - size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ -@@ -111,12 +177,13 @@ typedef struct { - /* ZSTD_buildBlockEntropyStats() : - * Builds entropy for the block. - * @return : 0 on success or error code */ --size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, -- const ZSTD_entropyCTables_t* prevEntropy, -- ZSTD_entropyCTables_t* nextEntropy, -- const ZSTD_CCtx_params* cctxParams, -- ZSTD_entropyCTablesMetadata_t* entropyMetadata, -- void* workspace, size_t wkspSize); -+size_t ZSTD_buildBlockEntropyStats( -+ const SeqStore_t* seqStorePtr, -+ const ZSTD_entropyCTables_t* prevEntropy, -+ ZSTD_entropyCTables_t* nextEntropy, -+ const ZSTD_CCtx_params* cctxParams, -+ ZSTD_entropyCTablesMetadata_t* entropyMetadata, -+ void* workspace, size_t wkspSize); - - /* ******************************* - * Compression internals structs * -@@ -140,28 +207,29 @@ typedef struct { - stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */ - size_t size; /* The number of sequences. <= capacity. */ - size_t capacity; /* The capacity starting from `seq` pointer */ --} rawSeqStore_t; -+} RawSeqStore_t; - --UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; -+UNUSED_ATTR static const RawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; - - typedef struct { -- int price; -- U32 off; -- U32 mlen; -- U32 litlen; -- U32 rep[ZSTD_REP_NUM]; -+ int price; /* price from beginning of segment to this position */ -+ U32 off; /* offset of previous match */ -+ U32 mlen; /* length of previous match */ -+ U32 litlen; /* nb of literals since previous match */ -+ U32 rep[ZSTD_REP_NUM]; /* offset history after previous match */ - } ZSTD_optimal_t; - - typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; - -+#define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3) - typedef struct { - /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ - unsigned* litFreq; /* table of literals statistics, of size 256 */ - unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ - unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ - unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ -- ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ -- ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ -+ ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_SIZE */ -+ ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */ - - U32 litSum; /* nb of literals */ - U32 litLengthSum; /* nb of litLength codes */ -@@ -173,7 +241,7 @@ typedef struct { - U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ - ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ - const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ -- ZSTD_paramSwitch_e literalCompressionMode; -+ ZSTD_ParamSwitch_e literalCompressionMode; - } optState_t; - - typedef struct { -@@ -195,11 +263,11 @@ typedef struct { - - #define ZSTD_WINDOW_START_INDEX 2 - --typedef struct ZSTD_matchState_t ZSTD_matchState_t; -+typedef struct ZSTD_MatchState_t ZSTD_MatchState_t; - - #define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */ - --struct ZSTD_matchState_t { -+struct ZSTD_MatchState_t { - ZSTD_window_t window; /* State for window round buffer management */ - U32 loadedDictEnd; /* index of end of dictionary, within context's referential. - * When loadedDictEnd != 0, a dictionary is in use, and still valid. -@@ -212,28 +280,42 @@ struct ZSTD_matchState_t { - U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ - - U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/ -- U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ -+ BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ - U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */ -+ U64 hashSalt; /* For row-based matchFinder: salts the hash for reuse of tag table */ -+ U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */ - - U32* hashTable; - U32* hashTable3; - U32* chainTable; - -- U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */ -+ int forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */ - - int dedicatedDictSearch; /* Indicates whether this matchState is using the - * dedicated dictionary search structure. - */ - optState_t opt; /* optimal parser state */ -- const ZSTD_matchState_t* dictMatchState; -+ const ZSTD_MatchState_t* dictMatchState; - ZSTD_compressionParameters cParams; -- const rawSeqStore_t* ldmSeqStore; -+ const RawSeqStore_t* ldmSeqStore; -+ -+ /* Controls prefetching in some dictMatchState matchfinders. -+ * This behavior is controlled from the cctx ms. -+ * This parameter has no effect in the cdict ms. */ -+ int prefetchCDictTables; -+ -+ /* When == 0, lazy match finders insert every position. -+ * When != 0, lazy match finders only insert positions they search. -+ * This allows them to skip much faster over incompressible data, -+ * at a small cost to compression ratio. -+ */ -+ int lazySkipping; - }; - - typedef struct { - ZSTD_compressedBlockState_t* prevCBlock; - ZSTD_compressedBlockState_t* nextCBlock; -- ZSTD_matchState_t matchState; -+ ZSTD_MatchState_t matchState; - } ZSTD_blockState_t; - - typedef struct { -@@ -260,7 +342,7 @@ typedef struct { - } ldmState_t; - - typedef struct { -- ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */ -+ ZSTD_ParamSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */ - U32 hashLog; /* Log size of hashTable */ - U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ - U32 minMatchLength; /* Minimum match length */ -@@ -291,7 +373,7 @@ struct ZSTD_CCtx_params_s { - * There is no guarantee that hint is close to actual source size */ - - ZSTD_dictAttachPref_e attachDictPref; -- ZSTD_paramSwitch_e literalCompressionMode; -+ ZSTD_ParamSwitch_e literalCompressionMode; - - /* Multithreading: used to pass parameters to mtctx */ - int nbWorkers; -@@ -310,24 +392,54 @@ struct ZSTD_CCtx_params_s { - ZSTD_bufferMode_e outBufferMode; - - /* Sequence compression API */ -- ZSTD_sequenceFormat_e blockDelimiters; -+ ZSTD_SequenceFormat_e blockDelimiters; - int validateSequences; - -- /* Block splitting */ -- ZSTD_paramSwitch_e useBlockSplitter; -+ /* Block splitting -+ * @postBlockSplitter executes split analysis after sequences are produced, -+ * it's more accurate but consumes more resources. -+ * @preBlockSplitter_level splits before knowing sequences, -+ * it's more approximative but also cheaper. -+ * Valid @preBlockSplitter_level values range from 0 to 6 (included). -+ * 0 means auto, 1 means do not split, -+ * then levels are sorted in increasing cpu budget, from 2 (fastest) to 6 (slowest). -+ * Highest @preBlockSplitter_level combines well with @postBlockSplitter. -+ */ -+ ZSTD_ParamSwitch_e postBlockSplitter; -+ int preBlockSplitter_level; -+ -+ /* Adjust the max block size*/ -+ size_t maxBlockSize; - - /* Param for deciding whether to use row-based matchfinder */ -- ZSTD_paramSwitch_e useRowMatchFinder; -+ ZSTD_ParamSwitch_e useRowMatchFinder; - - /* Always load a dictionary in ext-dict mode (not prefix mode)? */ - int deterministicRefPrefix; - - /* Internal use, for createCCtxParams() and freeCCtxParams() only */ - ZSTD_customMem customMem; -+ -+ /* Controls prefetching in some dictMatchState matchfinders */ -+ ZSTD_ParamSwitch_e prefetchCDictTables; -+ -+ /* Controls whether zstd will fall back to an internal matchfinder -+ * if the external matchfinder returns an error code. */ -+ int enableMatchFinderFallback; -+ -+ /* Parameters for the external sequence producer API. -+ * Users set these parameters through ZSTD_registerSequenceProducer(). -+ * It is not possible to set these parameters individually through the public API. */ -+ void* extSeqProdState; -+ ZSTD_sequenceProducer_F extSeqProdFunc; -+ -+ /* Controls repcode search in external sequence parsing */ -+ ZSTD_ParamSwitch_e searchForExternalRepcodes; - }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ - - #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) - #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE) -+#define TMP_WORKSPACE_SIZE (MAX(ENTROPY_WORKSPACE_SIZE, ZSTD_SLIPBLOCK_WORKSPACESIZE)) - - /* - * Indicates whether this compression proceeds directly from user-provided -@@ -345,11 +457,11 @@ typedef enum { - */ - #define ZSTD_MAX_NB_BLOCK_SPLITS 196 - typedef struct { -- seqStore_t fullSeqStoreChunk; -- seqStore_t firstHalfSeqStore; -- seqStore_t secondHalfSeqStore; -- seqStore_t currSeqStore; -- seqStore_t nextSeqStore; -+ SeqStore_t fullSeqStoreChunk; -+ SeqStore_t firstHalfSeqStore; -+ SeqStore_t secondHalfSeqStore; -+ SeqStore_t currSeqStore; -+ SeqStore_t nextSeqStore; - - U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS]; - ZSTD_entropyCTablesMetadata_t entropyMetadata; -@@ -366,7 +478,7 @@ struct ZSTD_CCtx_s { - size_t dictContentSize; - - ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */ -- size_t blockSize; -+ size_t blockSizeMax; - unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */ - unsigned long long consumedSrcSize; - unsigned long long producedCSize; -@@ -378,13 +490,14 @@ struct ZSTD_CCtx_s { - int isFirstBlock; - int initialized; - -- seqStore_t seqStore; /* sequences storage ptrs */ -+ SeqStore_t seqStore; /* sequences storage ptrs */ - ldmState_t ldmState; /* long distance matching state */ - rawSeq* ldmSequences; /* Storage for the ldm output sequences */ - size_t maxNbLdmSequences; -- rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */ -+ RawSeqStore_t externSeqStore; /* Mutable reference to external sequences */ - ZSTD_blockState_t blockState; -- U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */ -+ void* tmpWorkspace; /* used as substitute of stack space - must be aligned for S64 type */ -+ size_t tmpWkspSize; - - /* Whether we are streaming or not */ - ZSTD_buffered_policy_e bufferedPolicy; -@@ -404,6 +517,7 @@ struct ZSTD_CCtx_s { - - /* Stable in/out buffer verification */ - ZSTD_inBuffer expectedInBuffer; -+ size_t stableIn_notConsumed; /* nb bytes within stable input buffer that are said to be consumed but are not */ - size_t expectedOutBufferSize; - - /* Dictionary */ -@@ -417,9 +531,14 @@ struct ZSTD_CCtx_s { - - /* Workspace for block splitter */ - ZSTD_blockSplitCtx blockSplitCtx; -+ -+ /* Buffer for output from external sequence producer */ -+ ZSTD_Sequence* extSeqBuf; -+ size_t extSeqBufCapacity; - }; - - typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; -+typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e; - - typedef enum { - ZSTD_noDict = 0, -@@ -441,17 +560,17 @@ typedef enum { - * In this mode we take both the source size and the dictionary size - * into account when selecting and adjusting the parameters. - */ -- ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams. -+ ZSTD_cpm_unknown = 3 /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams. - * We don't know what these parameters are for. We default to the legacy - * behavior of taking both the source size and the dict size into account - * when selecting and adjusting parameters. - */ --} ZSTD_cParamMode_e; -+} ZSTD_CParamMode_e; - --typedef size_t (*ZSTD_blockCompressor) ( -- ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+typedef size_t (*ZSTD_BlockCompressor_f) ( -+ ZSTD_MatchState_t* bs, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode); -+ZSTD_BlockCompressor_f ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_ParamSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode); - - - MEM_STATIC U32 ZSTD_LLcode(U32 litLength) -@@ -497,12 +616,33 @@ MEM_STATIC int ZSTD_cParam_withinBounds( - return 1; - } - -+/* ZSTD_selectAddr: -+ * @return index >= lowLimit ? candidate : backup, -+ * tries to force branchless codegen. */ -+MEM_STATIC const BYTE* -+ZSTD_selectAddr(U32 index, U32 lowLimit, const BYTE* candidate, const BYTE* backup) -+{ -+#if defined(__x86_64__) -+ __asm__ ( -+ "cmp %1, %2\n" -+ "cmova %3, %0\n" -+ : "+r"(candidate) -+ : "r"(index), "r"(lowLimit), "r"(backup) -+ ); -+ return candidate; -+#else -+ return index >= lowLimit ? candidate : backup; -+#endif -+} -+ - /* ZSTD_noCompressBlock() : - * Writes uncompressed block to dst buffer from given src. - * Returns the size of the block */ --MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) -+MEM_STATIC size_t -+ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) - { - U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); -+ DEBUGLOG(5, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity); - RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, - dstSize_tooSmall, "dst buf too small for uncompressed block"); - MEM_writeLE24(dst, cBlockHeader24); -@@ -510,7 +650,8 @@ MEM_STATIC size_t ZSTD_noCompressBlock ( - return ZSTD_blockHeaderSize + srcSize; - } - --MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) -+MEM_STATIC size_t -+ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) - { - BYTE* const op = (BYTE*)dst; - U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); -@@ -529,7 +670,7 @@ MEM_STATIC size_t ZSTD_minGain(size_t sr - { - U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; - ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); -- assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); -+ assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat)); - return (srcSize >> minlog) + 2; - } - -@@ -565,29 +706,68 @@ ZSTD_safecopyLiterals(BYTE* op, BYTE con - while (ip < iend) *op++ = *ip++; - } - --#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) --#define STORE_REPCODE_1 STORE_REPCODE(1) --#define STORE_REPCODE_2 STORE_REPCODE(2) --#define STORE_REPCODE_3 STORE_REPCODE(3) --#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1) --#define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE) --#define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE) --#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE) --#define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE) --#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */ --#define STORED_TO_OFFBASE(o) ((o)+1) --#define OFFBASE_TO_STORED(o) ((o)-1) -+ -+#define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1) -+#define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2) -+#define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3) -+#define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */ -+#define OFFSET_TO_OFFBASE(o) (assert((o)>0), o + ZSTD_REP_NUM) -+#define OFFBASE_IS_OFFSET(o) ((o) > ZSTD_REP_NUM) -+#define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM) -+#define OFFBASE_TO_OFFSET(o) (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM) -+#define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o)) /* returns ID 1,2,3 */ -+ -+/*! ZSTD_storeSeqOnly() : -+ * Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t. -+ * Literals themselves are not copied, but @litPtr is updated. -+ * @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE(). -+ * @matchLength : must be >= MINMATCH -+*/ -+HINT_INLINE UNUSED_ATTR void -+ZSTD_storeSeqOnly(SeqStore_t* seqStorePtr, -+ size_t litLength, -+ U32 offBase, -+ size_t matchLength) -+{ -+ assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); -+ -+ /* literal Length */ -+ assert(litLength <= ZSTD_BLOCKSIZE_MAX); -+ if (UNLIKELY(litLength>0xFFFF)) { -+ assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ -+ seqStorePtr->longLengthType = ZSTD_llt_literalLength; -+ seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); -+ } -+ seqStorePtr->sequences[0].litLength = (U16)litLength; -+ -+ /* match offset */ -+ seqStorePtr->sequences[0].offBase = offBase; -+ -+ /* match Length */ -+ assert(matchLength <= ZSTD_BLOCKSIZE_MAX); -+ assert(matchLength >= MINMATCH); -+ { size_t const mlBase = matchLength - MINMATCH; -+ if (UNLIKELY(mlBase>0xFFFF)) { -+ assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ -+ seqStorePtr->longLengthType = ZSTD_llt_matchLength; -+ seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); -+ } -+ seqStorePtr->sequences[0].mlBase = (U16)mlBase; -+ } -+ -+ seqStorePtr->sequences++; -+} - - /*! ZSTD_storeSeq() : -- * Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t. -- * @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET(). -+ * Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t. -+ * @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE(). - * @matchLength : must be >= MINMATCH -- * Allowed to overread literals up to litLimit. -+ * Allowed to over-read literals up to litLimit. - */ - HINT_INLINE UNUSED_ATTR void --ZSTD_storeSeq(seqStore_t* seqStorePtr, -+ZSTD_storeSeq(SeqStore_t* seqStorePtr, - size_t litLength, const BYTE* literals, const BYTE* litLimit, -- U32 offBase_minus1, -+ U32 offBase, - size_t matchLength) - { - BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; -@@ -596,8 +776,8 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr, - static const BYTE* g_start = NULL; - if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ - { U32 const pos = (U32)((const BYTE*)literals - g_start); -- DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", -- pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1); -+ DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u", -+ pos, (U32)litLength, (U32)matchLength, (U32)offBase); - } - #endif - assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); -@@ -607,9 +787,9 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr, - assert(literals + litLength <= litLimit); - if (litEnd <= litLimit_w) { - /* Common case we can use wildcopy. -- * First copy 16 bytes, because literals are likely short. -- */ -- assert(WILDCOPY_OVERLENGTH >= 16); -+ * First copy 16 bytes, because literals are likely short. -+ */ -+ ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16); - ZSTD_copy16(seqStorePtr->lit, literals); - if (litLength > 16) { - ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); -@@ -619,44 +799,22 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr, - } - seqStorePtr->lit += litLength; - -- /* literal Length */ -- if (litLength>0xFFFF) { -- assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ -- seqStorePtr->longLengthType = ZSTD_llt_literalLength; -- seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); -- } -- seqStorePtr->sequences[0].litLength = (U16)litLength; -- -- /* match offset */ -- seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1); -- -- /* match Length */ -- assert(matchLength >= MINMATCH); -- { size_t const mlBase = matchLength - MINMATCH; -- if (mlBase>0xFFFF) { -- assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ -- seqStorePtr->longLengthType = ZSTD_llt_matchLength; -- seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); -- } -- seqStorePtr->sequences[0].mlBase = (U16)mlBase; -- } -- -- seqStorePtr->sequences++; -+ ZSTD_storeSeqOnly(seqStorePtr, litLength, offBase, matchLength); - } - - /* ZSTD_updateRep() : - * updates in-place @rep (array of repeat offsets) -- * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq() -+ * @offBase : sum-type, using numeric representation of ZSTD_storeSeq() - */ - MEM_STATIC void --ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) -+ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0) - { -- if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */ -+ if (OFFBASE_IS_OFFSET(offBase)) { /* full offset */ - rep[2] = rep[1]; - rep[1] = rep[0]; -- rep[0] = STORED_OFFSET(offBase_minus1); -+ rep[0] = OFFBASE_TO_OFFSET(offBase); - } else { /* repcode */ -- U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0; -+ U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0; - if (repCode > 0) { /* note : if repCode==0, no change */ - U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; - rep[2] = (repCode >= 2) ? rep[1] : rep[2]; -@@ -670,14 +828,14 @@ ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U3 - - typedef struct repcodes_s { - U32 rep[3]; --} repcodes_t; -+} Repcodes_t; - --MEM_STATIC repcodes_t --ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) -+MEM_STATIC Repcodes_t -+ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0) - { -- repcodes_t newReps; -+ Repcodes_t newReps; - ZSTD_memcpy(&newReps, rep, sizeof(newReps)); -- ZSTD_updateRep(newReps.rep, offBase_minus1, ll0); -+ ZSTD_updateRep(newReps.rep, offBase, ll0); - return newReps; - } - -@@ -685,59 +843,6 @@ ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], - /*-************************************* - * Match length counter - ***************************************/ --static unsigned ZSTD_NbCommonBytes (size_t val) --{ -- if (MEM_isLittleEndian()) { -- if (MEM_64bits()) { --# if (__GNUC__ >= 4) -- return (__builtin_ctzll((U64)val) >> 3); --# else -- static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, -- 0, 3, 1, 3, 1, 4, 2, 7, -- 0, 2, 3, 6, 1, 5, 3, 5, -- 1, 3, 4, 4, 2, 5, 6, 7, -- 7, 0, 1, 2, 3, 3, 4, 6, -- 2, 6, 5, 5, 3, 4, 5, 6, -- 7, 1, 2, 4, 6, 4, 4, 5, -- 7, 2, 6, 5, 7, 6, 7, 7 }; -- return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; --# endif -- } else { /* 32 bits */ --# if (__GNUC__ >= 3) -- return (__builtin_ctz((U32)val) >> 3); --# else -- static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, -- 3, 2, 2, 1, 3, 2, 0, 1, -- 3, 3, 1, 2, 2, 2, 2, 0, -- 3, 1, 2, 0, 1, 0, 1, 1 }; -- return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; --# endif -- } -- } else { /* Big Endian CPU */ -- if (MEM_64bits()) { --# if (__GNUC__ >= 4) -- return (__builtin_clzll(val) >> 3); --# else -- unsigned r; -- const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ -- if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } -- if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } -- r += (!val); -- return r; --# endif -- } else { /* 32 bits */ --# if (__GNUC__ >= 3) -- return (__builtin_clz((U32)val) >> 3); --# else -- unsigned r; -- if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } -- r += (!val); -- return r; --# endif -- } } --} -- -- - MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) - { - const BYTE* const pStart = pIn; -@@ -771,8 +876,8 @@ ZSTD_count_2segments(const BYTE* ip, con - size_t const matchLength = ZSTD_count(ip, match, vEnd); - if (match + matchLength != mEnd) return matchLength; - DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength); -- DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match); -- DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip); -+ DEBUGLOG(7, "distance from match beginning to end dictionary = %i", (int)(mEnd - match)); -+ DEBUGLOG(7, "distance from current pos to end buffer = %i", (int)(iEnd - ip)); - DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart); - DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd)); - return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd); -@@ -783,32 +888,43 @@ ZSTD_count_2segments(const BYTE* ip, con - * Hashes - ***************************************/ - static const U32 prime3bytes = 506832829U; --static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; } --MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ -+static U32 ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s) >> (32-h) ; } -+MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */ -+MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); } - - static const U32 prime4bytes = 2654435761U; --static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } --static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } -+static U32 ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; } -+static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); } -+static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); } - - static const U64 prime5bytes = 889523592379ULL; --static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } --static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } -+static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; } -+static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); } -+static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); } - - static const U64 prime6bytes = 227718039650203ULL; --static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } --static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } -+static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; } -+static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); } -+static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); } - - static const U64 prime7bytes = 58295818150454627ULL; --static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } --static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } -+static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; } -+static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); } -+static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); } - - static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; --static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } --static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } -+static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes) ^ s) >> (64-h)) ; } -+static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); } -+static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); } -+ - - MEM_STATIC FORCE_INLINE_ATTR - size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) - { -+ /* Although some of these hashes do support hBits up to 64, some do not. -+ * To be on the safe side, always avoid hBits > 32. */ -+ assert(hBits <= 32); -+ - switch(mls) - { - default: -@@ -820,6 +936,24 @@ size_t ZSTD_hashPtr(const void* p, U32 h - } - } - -+MEM_STATIC FORCE_INLINE_ATTR -+size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) { -+ /* Although some of these hashes do support hBits up to 64, some do not. -+ * To be on the safe side, always avoid hBits > 32. */ -+ assert(hBits <= 32); -+ -+ switch(mls) -+ { -+ default: -+ case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt); -+ case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt); -+ case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt); -+ case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt); -+ case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt); -+ } -+} -+ -+ - /* ZSTD_ipow() : - * Return base^exponent. - */ -@@ -881,11 +1015,12 @@ MEM_STATIC U64 ZSTD_rollingHash_rotate(U - /*-************************************* - * Round buffer management - ***************************************/ --#if (ZSTD_WINDOWLOG_MAX_64 > 31) --# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" --#endif --/* Max current allowed */ --#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) -+/* Max @current value allowed: -+ * In 32-bit mode: we want to avoid crossing the 2 GB limit, -+ * reducing risks of side effects in case of signed operations on indexes. -+ * In 64-bit mode: we want to ensure that adding the maximum job size (512 MB) -+ * doesn't overflow U32 index capacity (4 GB) */ -+#define ZSTD_CURRENT_MAX (MEM_64bits() ? 3500U MB : 2000U MB) - /* Maximum chunk size before overflow correction needs to be called again */ - #define ZSTD_CHUNKSIZE_MAX \ - ( ((U32)-1) /* Maximum ending current index */ \ -@@ -925,7 +1060,7 @@ MEM_STATIC U32 ZSTD_window_hasExtDict(ZS - * Inspects the provided matchState and figures out what dictMode should be - * passed to the compressor. - */ --MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) -+MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_MatchState_t *ms) - { - return ZSTD_window_hasExtDict(ms->window) ? - ZSTD_extDict : -@@ -1011,7 +1146,9 @@ MEM_STATIC U32 ZSTD_window_needOverflowC - * The least significant cycleLog bits of the indices must remain the same, - * which may be 0. Every index up to maxDist in the past must be valid. - */ --MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, -+MEM_STATIC -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, - U32 maxDist, void const* src) - { - /* preemptive overflow correction: -@@ -1112,7 +1249,7 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t - const void* blockEnd, - U32 maxDist, - U32* loadedDictEndPtr, -- const ZSTD_matchState_t** dictMatchStatePtr) -+ const ZSTD_MatchState_t** dictMatchStatePtr) - { - U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); - U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; -@@ -1157,7 +1294,7 @@ ZSTD_checkDictValidity(const ZSTD_window - const void* blockEnd, - U32 maxDist, - U32* loadedDictEndPtr, -- const ZSTD_matchState_t** dictMatchStatePtr) -+ const ZSTD_MatchState_t** dictMatchStatePtr) - { - assert(loadedDictEndPtr != NULL); - assert(dictMatchStatePtr != NULL); -@@ -1167,10 +1304,15 @@ ZSTD_checkDictValidity(const ZSTD_window - (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); - assert(blockEndIdx >= loadedDictEnd); - -- if (blockEndIdx > loadedDictEnd + maxDist) { -+ if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->dictLimit) { - /* On reaching window size, dictionaries are invalidated. - * For simplification, if window size is reached anywhere within next block, - * the dictionary is invalidated for the full block. -+ * -+ * We also have to invalidate the dictionary if ZSTD_window_update() has detected -+ * non-contiguous segments, which means that loadedDictEnd != window->dictLimit. -+ * loadedDictEnd may be 0, if forceWindow is true, but in that case we never use -+ * dictMatchState, so setting it to NULL is not a problem. - */ - DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); - *loadedDictEndPtr = 0; -@@ -1199,9 +1341,11 @@ MEM_STATIC void ZSTD_window_init(ZSTD_wi - * forget about the extDict. Handles overlap of the prefix and extDict. - * Returns non-zero if the segment is contiguous. - */ --MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, -- void const* src, size_t srcSize, -- int forceNonContiguous) -+MEM_STATIC -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+U32 ZSTD_window_update(ZSTD_window_t* window, -+ const void* src, size_t srcSize, -+ int forceNonContiguous) - { - BYTE const* const ip = (BYTE const*)src; - U32 contiguous = 1; -@@ -1228,8 +1372,9 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_w - /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ - if ( (ip+srcSize > window->dictBase + window->lowLimit) - & (ip < window->dictBase + window->dictLimit)) { -- ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; -- U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; -+ size_t const highInputIdx = (size_t)((ip + srcSize) - window->dictBase); -+ U32 const lowLimitMax = (highInputIdx > (size_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; -+ assert(highInputIdx < UINT_MAX); - window->lowLimit = lowLimitMax; - DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); - } -@@ -1239,7 +1384,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_w - /* - * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. - */ --MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) -+MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog) - { - U32 const maxDistance = 1U << windowLog; - U32 const lowestValid = ms->window.lowLimit; -@@ -1256,7 +1401,7 @@ MEM_STATIC U32 ZSTD_getLowestMatchIndex( - /* - * Returns the lowest allowed match index in the prefix. - */ --MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) -+MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog) - { - U32 const maxDistance = 1U << windowLog; - U32 const lowestValid = ms->window.dictLimit; -@@ -1269,6 +1414,13 @@ MEM_STATIC U32 ZSTD_getLowestPrefixIndex - return matchLowest; - } - -+/* index_safety_check: -+ * intentional underflow : ensure repIndex isn't overlapping dict + prefix -+ * @return 1 if values are not overlapping, -+ * 0 otherwise */ -+MEM_STATIC int ZSTD_index_overlap_check(const U32 prefixLowestIndex, const U32 repIndex) { -+ return ((U32)((prefixLowestIndex-1) - repIndex) >= 3); -+} - - - /* debug functions */ -@@ -1302,7 +1454,42 @@ MEM_STATIC void ZSTD_debugTable(const U3 - - #endif - -+/* Short Cache */ - -+/* Normally, zstd matchfinders follow this flow: -+ * 1. Compute hash at ip -+ * 2. Load index from hashTable[hash] -+ * 3. Check if *ip == *(base + index) -+ * In dictionary compression, loading *(base + index) is often an L2 or even L3 miss. -+ * -+ * Short cache is an optimization which allows us to avoid step 3 most of the time -+ * when the data doesn't actually match. With short cache, the flow becomes: -+ * 1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip. -+ * 2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works. -+ * 3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue. -+ * -+ * Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to -+ * dictMatchState matchfinders. -+ */ -+#define ZSTD_SHORT_CACHE_TAG_BITS 8 -+#define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1) -+ -+/* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable. -+ * Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */ -+MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) { -+ size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS; -+ U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK); -+ assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0); -+ hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag; -+} -+ -+/* Helper function for short cache matchfinders. -+ * Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */ -+MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) { -+ U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK; -+ U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK; -+ return tag1 == tag2; -+} - - /* =============================================================== - * Shared internal declarations -@@ -1319,6 +1506,25 @@ size_t ZSTD_loadCEntropy(ZSTD_compressed - - void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); - -+typedef struct { -+ U32 idx; /* Index in array of ZSTD_Sequence */ -+ U32 posInSequence; /* Position within sequence at idx */ -+ size_t posInSrc; /* Number of bytes given by sequences provided so far */ -+} ZSTD_SequencePosition; -+ -+/* for benchmark */ -+size_t ZSTD_convertBlockSequences(ZSTD_CCtx* cctx, -+ const ZSTD_Sequence* const inSeqs, size_t nbSequences, -+ int const repcodeResolution); -+ -+typedef struct { -+ size_t nbSequences; -+ size_t blockSize; -+ size_t litSize; -+} BlockSummary; -+ -+BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs); -+ - /* ============================================================== - * Private declarations - * These prototypes shall only be called from within lib/compress -@@ -1330,7 +1536,7 @@ void ZSTD_reset_compressedBlockState(ZST - * Note: srcSizeHint == 0 means 0! - */ - ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( -- const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); -+ const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode); - - /*! ZSTD_initCStream_internal() : - * Private use only. Init streaming operation. -@@ -1342,7 +1548,7 @@ size_t ZSTD_initCStream_internal(ZSTD_CS - const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); - --void ZSTD_resetSeqStore(seqStore_t* ssPtr); -+void ZSTD_resetSeqStore(SeqStore_t* ssPtr); - - /*! ZSTD_getCParamsFromCDict() : - * as the name implies */ -@@ -1381,11 +1587,10 @@ size_t ZSTD_writeLastEmptyBlock(void* ds - * This cannot be used when long range matching is enabled. - * Zstd will use these sequences, and pass the literals to a secondary block - * compressor. -- * @return : An error code on failure. - * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory - * access and data corruption. - */ --size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); -+void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); - - /* ZSTD_cycleLog() : - * condition for correct operation : hashLog > 1 */ -@@ -1396,4 +1601,28 @@ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_stra - */ - void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); - -+/* Returns 1 if an external sequence producer is registered, otherwise returns 0. */ -+MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) { -+ return params->extSeqProdFunc != NULL; -+} -+ -+/* =============================================================== -+ * Deprecated definitions that are still used internally to avoid -+ * deprecation warnings. These functions are exactly equivalent to -+ * their public variants, but avoid the deprecation warnings. -+ * =============================================================== */ -+ -+size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); -+ -+size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize); -+ -+size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize); -+ -+size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -+ -+ - #endif /* ZSTD_COMPRESS_H */ ---- a/lib/zstd/compress/zstd_compress_literals.c -+++ b/lib/zstd/compress/zstd_compress_literals.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -13,11 +14,36 @@ - ***************************************/ - #include "zstd_compress_literals.h" - -+ -+/* ************************************************************** -+* Debug Traces -+****************************************************************/ -+#if DEBUGLEVEL >= 2 -+ -+static size_t showHexa(const void* src, size_t srcSize) -+{ -+ const BYTE* const ip = (const BYTE*)src; -+ size_t u; -+ for (u=0; u31) + (srcSize>4095); - -+ DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity); -+ - RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); - - switch(flSize) -@@ -36,16 +62,30 @@ size_t ZSTD_noCompressLiterals (void* ds - } - - ZSTD_memcpy(ostart + flSize, src, srcSize); -- DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); -+ DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); - return srcSize + flSize; - } - -+static int allBytesIdentical(const void* src, size_t srcSize) -+{ -+ assert(srcSize >= 1); -+ assert(src != NULL); -+ { const BYTE b = ((const BYTE*)src)[0]; -+ size_t p; -+ for (p=1; p31) + (srcSize>4095); - -- (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ -+ assert(dstCapacity >= 4); (void)dstCapacity; -+ assert(allBytesIdentical(src, srcSize)); - - switch(flSize) - { -@@ -63,28 +103,51 @@ size_t ZSTD_compressRleLiteralsBlock (vo - } - - ostart[flSize] = *(const BYTE*)src; -- DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1); -+ DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1); - return flSize+1; - } - --size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, -- ZSTD_hufCTables_t* nextHuf, -- ZSTD_strategy strategy, int disableLiteralCompression, -- void* dst, size_t dstCapacity, -- const void* src, size_t srcSize, -- void* entropyWorkspace, size_t entropyWorkspaceSize, -- const int bmi2, -- unsigned suspectUncompressible) -+/* ZSTD_minLiteralsToCompress() : -+ * returns minimal amount of literals -+ * for literal compression to even be attempted. -+ * Minimum is made tighter as compression strategy increases. -+ */ -+static size_t -+ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat) -+{ -+ assert((int)strategy >= 0); -+ assert((int)strategy <= 9); -+ /* btultra2 : min 8 bytes; -+ * then 2x larger for each successive compression strategy -+ * max threshold 64 bytes */ -+ { int const shift = MIN(9-(int)strategy, 3); -+ size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift; -+ DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc); -+ return mintc; -+ } -+} -+ -+size_t ZSTD_compressLiterals ( -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize, -+ void* entropyWorkspace, size_t entropyWorkspaceSize, -+ const ZSTD_hufCTables_t* prevHuf, -+ ZSTD_hufCTables_t* nextHuf, -+ ZSTD_strategy strategy, -+ int disableLiteralCompression, -+ int suspectUncompressible, -+ int bmi2) - { -- size_t const minGain = ZSTD_minGain(srcSize, strategy); - size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); - BYTE* const ostart = (BYTE*)dst; - U32 singleStream = srcSize < 256; -- symbolEncodingType_e hType = set_compressed; -+ SymbolEncodingType_e hType = set_compressed; - size_t cLitSize; - -- DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)", -- disableLiteralCompression, (U32)srcSize); -+ DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)", -+ disableLiteralCompression, (U32)srcSize, dstCapacity); -+ -+ DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize)); - - /* Prepare nextEntropy assuming reusing the existing table */ - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); -@@ -92,40 +155,51 @@ size_t ZSTD_compressLiterals (ZSTD_hufCT - if (disableLiteralCompression) - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - -- /* small ? don't even attempt compression (speed opt) */ --# define COMPRESS_LITERALS_SIZE_MIN 63 -- { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; -- if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); -- } -+ /* if too small, don't even attempt compression (speed opt) */ -+ if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode)) -+ return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - - RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); - { HUF_repeat repeat = prevHuf->repeatMode; -- int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; -+ int const flags = 0 -+ | (bmi2 ? HUF_flags_bmi2 : 0) -+ | (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0) -+ | (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0) -+ | (suspectUncompressible ? HUF_flags_suspectUncompressible : 0); -+ -+ typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int); -+ huf_compress_f huf_compress; - if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; -- cLitSize = singleStream ? -- HUF_compress1X_repeat( -- ostart+lhSize, dstCapacity-lhSize, src, srcSize, -- HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, -- (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) : -- HUF_compress4X_repeat( -- ostart+lhSize, dstCapacity-lhSize, src, srcSize, -- HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, -- (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible); -+ huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat; -+ cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize, -+ src, srcSize, -+ HUF_SYMBOLVALUE_MAX, LitHufLog, -+ entropyWorkspace, entropyWorkspaceSize, -+ (HUF_CElt*)nextHuf->CTable, -+ &repeat, flags); -+ DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize); - if (repeat != HUF_repeat_none) { - /* reused the existing table */ -- DEBUGLOG(5, "Reusing previous huffman table"); -+ DEBUGLOG(5, "reusing statistics from previous huffman block"); - hType = set_repeat; - } - } - -- if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { -- ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); -- return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); -- } -+ { size_t const minGain = ZSTD_minGain(srcSize, strategy); -+ if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { -+ ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); -+ return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); -+ } } - if (cLitSize==1) { -- ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); -- return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); -- } -+ /* A return value of 1 signals that the alphabet consists of a single symbol. -+ * However, in some rare circumstances, it could be the compressed size (a single byte). -+ * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`. -+ * (it's also necessary to not generate statistics). -+ * Therefore, in such a case, actively check that all bytes are identical. */ -+ if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) { -+ ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); -+ return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); -+ } } - - if (hType == set_compressed) { - /* using a newly constructed table */ -@@ -136,16 +210,19 @@ size_t ZSTD_compressLiterals (ZSTD_hufCT - switch(lhSize) - { - case 3: /* 2 - 2 - 10 - 10 */ -- { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); -+ if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); -+ { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); - MEM_writeLE24(ostart, lhc); - break; - } - case 4: /* 2 - 2 - 14 - 14 */ -+ assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); - { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); - MEM_writeLE32(ostart, lhc); - break; - } - case 5: /* 2 - 2 - 18 - 18 */ -+ assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); - { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); - MEM_writeLE32(ostart, lhc); - ostart[4] = (BYTE)(cLitSize >> 10); ---- a/lib/zstd/compress/zstd_compress_literals.h -+++ b/lib/zstd/compress/zstd_compress_literals.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -16,16 +17,24 @@ - - size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); - -+/* ZSTD_compressRleLiteralsBlock() : -+ * Conditions : -+ * - All bytes in @src are identical -+ * - dstCapacity >= 4 */ - size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); - --/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */ --size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, -- ZSTD_hufCTables_t* nextHuf, -- ZSTD_strategy strategy, int disableLiteralCompression, -- void* dst, size_t dstCapacity, -+/* ZSTD_compressLiterals(): -+ * @entropyWorkspace: must be aligned on 4-bytes boundaries -+ * @entropyWorkspaceSize : must be >= HUF_WORKSPACE_SIZE -+ * @suspectUncompressible: sampling checks, to potentially skip huffman coding -+ */ -+size_t ZSTD_compressLiterals (void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - void* entropyWorkspace, size_t entropyWorkspaceSize, -- const int bmi2, -- unsigned suspectUncompressible); -+ const ZSTD_hufCTables_t* prevHuf, -+ ZSTD_hufCTables_t* nextHuf, -+ ZSTD_strategy strategy, int disableLiteralCompression, -+ int suspectUncompressible, -+ int bmi2); - - #endif /* ZSTD_COMPRESS_LITERALS_H */ ---- a/lib/zstd/compress/zstd_compress_sequences.c -+++ b/lib/zstd/compress/zstd_compress_sequences.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -58,7 +59,7 @@ static unsigned ZSTD_useLowProbCount(siz - { - /* Heuristic: This should cover most blocks <= 16K and - * start to fade out after 16K to about 32K depending on -- * comprssibility. -+ * compressibility. - */ - return nbSeq >= 2048; - } -@@ -153,20 +154,20 @@ size_t ZSTD_crossEntropyCost(short const - return cost >> 8; - } - --symbolEncodingType_e -+SymbolEncodingType_e - ZSTD_selectEncodingType( - FSE_repeat* repeatMode, unsigned const* count, unsigned const max, - size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, - FSE_CTable const* prevCTable, - short const* defaultNorm, U32 defaultNormLog, -- ZSTD_defaultPolicy_e const isDefaultAllowed, -+ ZSTD_DefaultPolicy_e const isDefaultAllowed, - ZSTD_strategy const strategy) - { - ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); - if (mostFrequent == nbSeq) { - *repeatMode = FSE_repeat_none; - if (isDefaultAllowed && nbSeq <= 2) { -- /* Prefer set_basic over set_rle when there are 2 or less symbols, -+ /* Prefer set_basic over set_rle when there are 2 or fewer symbols, - * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. - * If basic encoding isn't possible, always choose RLE. - */ -@@ -241,7 +242,7 @@ typedef struct { - - size_t - ZSTD_buildCTable(void* dst, size_t dstCapacity, -- FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, -+ FSE_CTable* nextCTable, U32 FSELog, SymbolEncodingType_e type, - unsigned* count, U32 max, - const BYTE* codeTable, size_t nbSeq, - const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, -@@ -293,7 +294,7 @@ ZSTD_encodeSequences_body( - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, -- seqDef const* sequences, size_t nbSeq, int longOffsets) -+ SeqDef const* sequences, size_t nbSeq, int longOffsets) - { - BIT_CStream_t blockStream; - FSE_CState_t stateMatchLength; -@@ -387,7 +388,7 @@ ZSTD_encodeSequences_default( - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, -- seqDef const* sequences, size_t nbSeq, int longOffsets) -+ SeqDef const* sequences, size_t nbSeq, int longOffsets) - { - return ZSTD_encodeSequences_body(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, -@@ -405,7 +406,7 @@ ZSTD_encodeSequences_bmi2( - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, -- seqDef const* sequences, size_t nbSeq, int longOffsets) -+ SeqDef const* sequences, size_t nbSeq, int longOffsets) - { - return ZSTD_encodeSequences_body(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, -@@ -421,7 +422,7 @@ size_t ZSTD_encodeSequences( - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, -- seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) -+ SeqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) - { - DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); - #if DYNAMIC_BMI2 ---- a/lib/zstd/compress/zstd_compress_sequences.h -+++ b/lib/zstd/compress/zstd_compress_sequences.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -11,26 +12,27 @@ - #ifndef ZSTD_COMPRESS_SEQUENCES_H - #define ZSTD_COMPRESS_SEQUENCES_H - -+#include "zstd_compress_internal.h" /* SeqDef */ - #include "../common/fse.h" /* FSE_repeat, FSE_CTable */ --#include "../common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */ -+#include "../common/zstd_internal.h" /* SymbolEncodingType_e, ZSTD_strategy */ - - typedef enum { - ZSTD_defaultDisallowed = 0, - ZSTD_defaultAllowed = 1 --} ZSTD_defaultPolicy_e; -+} ZSTD_DefaultPolicy_e; - --symbolEncodingType_e -+SymbolEncodingType_e - ZSTD_selectEncodingType( - FSE_repeat* repeatMode, unsigned const* count, unsigned const max, - size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, - FSE_CTable const* prevCTable, - short const* defaultNorm, U32 defaultNormLog, -- ZSTD_defaultPolicy_e const isDefaultAllowed, -+ ZSTD_DefaultPolicy_e const isDefaultAllowed, - ZSTD_strategy const strategy); - - size_t - ZSTD_buildCTable(void* dst, size_t dstCapacity, -- FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, -+ FSE_CTable* nextCTable, U32 FSELog, SymbolEncodingType_e type, - unsigned* count, U32 max, - const BYTE* codeTable, size_t nbSeq, - const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, -@@ -42,7 +44,7 @@ size_t ZSTD_encodeSequences( - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, -- seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); -+ SeqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); - - size_t ZSTD_fseBitCost( - FSE_CTable const* ctable, ---- a/lib/zstd/compress/zstd_compress_superblock.c -+++ b/lib/zstd/compress/zstd_compress_superblock.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -36,13 +37,14 @@ - * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block - * and the following sub-blocks' literals sections will be Treeless_Literals_Block. - * @return : compressed size of literals section of a sub-block -- * Or 0 if it unable to compress. -+ * Or 0 if unable to compress. - * Or error code */ --static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, -- const ZSTD_hufCTablesMetadata_t* hufMetadata, -- const BYTE* literals, size_t litSize, -- void* dst, size_t dstSize, -- const int bmi2, int writeEntropy, int* entropyWritten) -+static size_t -+ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, -+ const ZSTD_hufCTablesMetadata_t* hufMetadata, -+ const BYTE* literals, size_t litSize, -+ void* dst, size_t dstSize, -+ const int bmi2, int writeEntropy, int* entropyWritten) - { - size_t const header = writeEntropy ? 200 : 0; - size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header)); -@@ -50,11 +52,9 @@ static size_t ZSTD_compressSubBlock_lite - BYTE* const oend = ostart + dstSize; - BYTE* op = ostart + lhSize; - U32 const singleStream = lhSize == 3; -- symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; -+ SymbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; - size_t cLitSize = 0; - -- (void)bmi2; /* TODO bmi2... */ -- - DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); - - *entropyWritten = 0; -@@ -76,9 +76,9 @@ static size_t ZSTD_compressSubBlock_lite - DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); - } - -- /* TODO bmi2 */ -- { const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable) -- : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable); -+ { int const flags = bmi2 ? HUF_flags_bmi2 : 0; -+ const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags) -+ : HUF_compress4X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags); - op += cSize; - cLitSize += cSize; - if (cSize == 0 || ERR_isError(cSize)) { -@@ -103,7 +103,7 @@ static size_t ZSTD_compressSubBlock_lite - switch(lhSize) - { - case 3: /* 2 - 2 - 10 - 10 */ -- { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); -+ { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); - MEM_writeLE24(ostart, lhc); - break; - } -@@ -123,26 +123,30 @@ static size_t ZSTD_compressSubBlock_lite - } - *entropyWritten = 1; - DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart)); -- return op-ostart; -+ return (size_t)(op-ostart); - } - --static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) { -- const seqDef* const sstart = sequences; -- const seqDef* const send = sequences + nbSeq; -- const seqDef* sp = sstart; -+static size_t -+ZSTD_seqDecompressedSize(SeqStore_t const* seqStore, -+ const SeqDef* sequences, size_t nbSeqs, -+ size_t litSize, int lastSubBlock) -+{ - size_t matchLengthSum = 0; - size_t litLengthSum = 0; -- (void)(litLengthSum); /* suppress unused variable warning on some environments */ -- while (send-sp > 0) { -- ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); -+ size_t n; -+ for (n=0; ncParams.windowLog > STREAM_ACCUMULATOR_MIN; - BYTE* const ostart = (BYTE*)dst; -@@ -176,14 +181,14 @@ static size_t ZSTD_compressSubBlock_sequ - /* Sequences Header */ - RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, - dstSize_tooSmall, ""); -- if (nbSeq < 0x7F) -+ if (nbSeq < 128) - *op++ = (BYTE)nbSeq; - else if (nbSeq < LONGNBSEQ) - op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; - else - op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; - if (nbSeq==0) { -- return op - ostart; -+ return (size_t)(op - ostart); - } - - /* seqHead : flags for FSE encoding type */ -@@ -205,7 +210,7 @@ static size_t ZSTD_compressSubBlock_sequ - } - - { size_t const bitstreamSize = ZSTD_encodeSequences( -- op, oend - op, -+ op, (size_t)(oend - op), - fseTables->matchlengthCTable, mlCode, - fseTables->offcodeCTable, ofCode, - fseTables->litlengthCTable, llCode, -@@ -249,7 +254,7 @@ static size_t ZSTD_compressSubBlock_sequ - #endif - - *entropyWritten = 1; -- return op - ostart; -+ return (size_t)(op - ostart); - } - - /* ZSTD_compressSubBlock() : -@@ -258,7 +263,7 @@ static size_t ZSTD_compressSubBlock_sequ - * Or 0 if it failed to compress. */ - static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy, - const ZSTD_entropyCTablesMetadata_t* entropyMetadata, -- const seqDef* sequences, size_t nbSeq, -+ const SeqDef* sequences, size_t nbSeq, - const BYTE* literals, size_t litSize, - const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, - const ZSTD_CCtx_params* cctxParams, -@@ -275,7 +280,8 @@ static size_t ZSTD_compressSubBlock(cons - litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock); - { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable, - &entropyMetadata->hufMetadata, literals, litSize, -- op, oend-op, bmi2, writeLitEntropy, litEntropyWritten); -+ op, (size_t)(oend-op), -+ bmi2, writeLitEntropy, litEntropyWritten); - FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed"); - if (cLitSize == 0) return 0; - op += cLitSize; -@@ -285,18 +291,18 @@ static size_t ZSTD_compressSubBlock(cons - sequences, nbSeq, - llCode, mlCode, ofCode, - cctxParams, -- op, oend-op, -+ op, (size_t)(oend-op), - bmi2, writeSeqEntropy, seqEntropyWritten); - FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed"); - if (cSeqSize == 0) return 0; - op += cSeqSize; - } - /* Write block header */ -- { size_t cSize = (op-ostart)-ZSTD_blockHeaderSize; -+ { size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize; - U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - MEM_writeLE24(ostart, cBlockHeader24); - } -- return op-ostart; -+ return (size_t)(op-ostart); - } - - static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, -@@ -322,7 +328,7 @@ static size_t ZSTD_estimateSubBlockSize_ - return 0; - } - --static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, -+static size_t ZSTD_estimateSubBlockSize_symbolType(SymbolEncodingType_e type, - const BYTE* codeTable, unsigned maxCode, - size_t nbSeq, const FSE_CTable* fseCTable, - const U8* additionalBits, -@@ -385,7 +391,11 @@ static size_t ZSTD_estimateSubBlockSize_ - return cSeqSizeEstimate + sequencesSectionHeaderSize; - } - --static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, -+typedef struct { -+ size_t estLitSize; -+ size_t estBlockSize; -+} EstimatedBlockSize; -+static EstimatedBlockSize ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, - const BYTE* ofCodeTable, - const BYTE* llCodeTable, - const BYTE* mlCodeTable, -@@ -393,15 +403,17 @@ static size_t ZSTD_estimateSubBlockSize( - const ZSTD_entropyCTables_t* entropy, - const ZSTD_entropyCTablesMetadata_t* entropyMetadata, - void* workspace, size_t wkspSize, -- int writeLitEntropy, int writeSeqEntropy) { -- size_t cSizeEstimate = 0; -- cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize, -- &entropy->huf, &entropyMetadata->hufMetadata, -- workspace, wkspSize, writeLitEntropy); -- cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, -+ int writeLitEntropy, int writeSeqEntropy) -+{ -+ EstimatedBlockSize ebs; -+ ebs.estLitSize = ZSTD_estimateSubBlockSize_literal(literals, litSize, -+ &entropy->huf, &entropyMetadata->hufMetadata, -+ workspace, wkspSize, writeLitEntropy); -+ ebs.estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, - nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, - workspace, wkspSize, writeSeqEntropy); -- return cSizeEstimate + ZSTD_blockHeaderSize; -+ ebs.estBlockSize += ebs.estLitSize + ZSTD_blockHeaderSize; -+ return ebs; - } - - static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) -@@ -415,14 +427,57 @@ static int ZSTD_needSequenceEntropyTable - return 0; - } - -+static size_t countLiterals(SeqStore_t const* seqStore, const SeqDef* sp, size_t seqCount) -+{ -+ size_t n, total = 0; -+ assert(sp != NULL); -+ for (n=0; n %zu bytes", seqCount, (const void*)sp, total); -+ return total; -+} -+ -+#define BYTESCALE 256 -+ -+static size_t sizeBlockSequences(const SeqDef* sp, size_t nbSeqs, -+ size_t targetBudget, size_t avgLitCost, size_t avgSeqCost, -+ int firstSubBlock) -+{ -+ size_t n, budget = 0, inSize=0; -+ /* entropy headers */ -+ size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */ -+ assert(firstSubBlock==0 || firstSubBlock==1); -+ budget += headerSize; -+ -+ /* first sequence => at least one sequence*/ -+ budget += sp[0].litLength * avgLitCost + avgSeqCost; -+ if (budget > targetBudget) return 1; -+ inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH); -+ -+ /* loop over sequences */ -+ for (n=1; n targetBudget) -+ /* though continue to expand until the sub-block is deemed compressible */ -+ && (budget < inSize * BYTESCALE) ) -+ break; -+ } -+ -+ return n; -+} -+ - /* ZSTD_compressSubBlock_multi() : - * Breaks super-block into multiple sub-blocks and compresses them. -- * Entropy will be written to the first block. -- * The following blocks will use repeat mode to compress. -- * All sub-blocks are compressed blocks (no raw or rle blocks). -- * @return : compressed size of the super block (which is multiple ZSTD blocks) -- * Or 0 if it failed to compress. */ --static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, -+ * Entropy will be written into the first block. -+ * The following blocks use repeat_mode to compress. -+ * Sub-blocks are all compressed, except the last one when beneficial. -+ * @return : compressed size of the super block (which features multiple ZSTD blocks) -+ * or 0 if it failed to compress. */ -+static size_t ZSTD_compressSubBlock_multi(const SeqStore_t* seqStorePtr, - const ZSTD_compressedBlockState_t* prevCBlock, - ZSTD_compressedBlockState_t* nextCBlock, - const ZSTD_entropyCTablesMetadata_t* entropyMetadata, -@@ -432,12 +487,14 @@ static size_t ZSTD_compressSubBlock_mult - const int bmi2, U32 lastBlock, - void* workspace, size_t wkspSize) - { -- const seqDef* const sstart = seqStorePtr->sequencesStart; -- const seqDef* const send = seqStorePtr->sequences; -- const seqDef* sp = sstart; -+ const SeqDef* const sstart = seqStorePtr->sequencesStart; -+ const SeqDef* const send = seqStorePtr->sequences; -+ const SeqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */ -+ size_t const nbSeqs = (size_t)(send - sstart); - const BYTE* const lstart = seqStorePtr->litStart; - const BYTE* const lend = seqStorePtr->lit; - const BYTE* lp = lstart; -+ size_t const nbLiterals = (size_t)(lend - lstart); - BYTE const* ip = (BYTE const*)src; - BYTE const* const iend = ip + srcSize; - BYTE* const ostart = (BYTE*)dst; -@@ -446,112 +503,171 @@ static size_t ZSTD_compressSubBlock_mult - const BYTE* llCodePtr = seqStorePtr->llCode; - const BYTE* mlCodePtr = seqStorePtr->mlCode; - const BYTE* ofCodePtr = seqStorePtr->ofCode; -- size_t targetCBlockSize = cctxParams->targetCBlockSize; -- size_t litSize, seqCount; -- int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed; -+ size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */ -+ size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize); -+ int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed); - int writeSeqEntropy = 1; -- int lastSequence = 0; - -- DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)", -- (unsigned)(lend-lp), (unsigned)(send-sstart)); -+ DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)", -+ (unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart)); - -- litSize = 0; -- seqCount = 0; -- do { -- size_t cBlockSizeEstimate = 0; -- if (sstart == send) { -- lastSequence = 1; -- } else { -- const seqDef* const sequence = sp + seqCount; -- lastSequence = sequence == send - 1; -- litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength; -- seqCount++; -- } -- if (lastSequence) { -- assert(lp <= lend); -- assert(litSize <= (size_t)(lend - lp)); -- litSize = (size_t)(lend - lp); -- } -- /* I think there is an optimization opportunity here. -- * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful -- * since it recalculates estimate from scratch. -- * For example, it would recount literal distribution and symbol codes every time. -- */ -- cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, -- &nextCBlock->entropy, entropyMetadata, -- workspace, wkspSize, writeLitEntropy, writeSeqEntropy); -- if (cBlockSizeEstimate > targetCBlockSize || lastSequence) { -- int litEntropyWritten = 0; -- int seqEntropyWritten = 0; -- const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence); -- const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, -- sp, seqCount, -- lp, litSize, -- llCodePtr, mlCodePtr, ofCodePtr, -- cctxParams, -- op, oend-op, -- bmi2, writeLitEntropy, writeSeqEntropy, -- &litEntropyWritten, &seqEntropyWritten, -- lastBlock && lastSequence); -- FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); -- if (cSize > 0 && cSize < decompressedSize) { -- DEBUGLOG(5, "Committed the sub-block"); -- assert(ip + decompressedSize <= iend); -- ip += decompressedSize; -- sp += seqCount; -- lp += litSize; -- op += cSize; -- llCodePtr += seqCount; -- mlCodePtr += seqCount; -- ofCodePtr += seqCount; -- litSize = 0; -- seqCount = 0; -- /* Entropy only needs to be written once */ -- if (litEntropyWritten) { -- writeLitEntropy = 0; -- } -- if (seqEntropyWritten) { -- writeSeqEntropy = 0; -- } -+ /* let's start by a general estimation for the full block */ -+ if (nbSeqs > 0) { -+ EstimatedBlockSize const ebs = -+ ZSTD_estimateSubBlockSize(lp, nbLiterals, -+ ofCodePtr, llCodePtr, mlCodePtr, nbSeqs, -+ &nextCBlock->entropy, entropyMetadata, -+ workspace, wkspSize, -+ writeLitEntropy, writeSeqEntropy); -+ /* quick estimation */ -+ size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE; -+ size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs; -+ const size_t nbSubBlocks = MAX((ebs.estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1); -+ size_t n, avgBlockBudget, blockBudgetSupp=0; -+ avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks; -+ DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes", -+ (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE, -+ (unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE); -+ /* simplification: if estimates states that the full superblock doesn't compress, just bail out immediately -+ * this will result in the production of a single uncompressed block covering @srcSize.*/ -+ if (ebs.estBlockSize > srcSize) return 0; -+ -+ /* compress and write sub-blocks */ -+ assert(nbSubBlocks>0); -+ for (n=0; n < nbSubBlocks-1; n++) { -+ /* determine nb of sequences for current sub-block + nbLiterals from next sequence */ -+ size_t const seqCount = sizeBlockSequences(sp, (size_t)(send-sp), -+ avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0); -+ /* if reached last sequence : break to last sub-block (simplification) */ -+ assert(seqCount <= (size_t)(send-sp)); -+ if (sp + seqCount == send) break; -+ assert(seqCount > 0); -+ /* compress sub-block */ -+ { int litEntropyWritten = 0; -+ int seqEntropyWritten = 0; -+ size_t litSize = countLiterals(seqStorePtr, sp, seqCount); -+ const size_t decompressedSize = -+ ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0); -+ size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, -+ sp, seqCount, -+ lp, litSize, -+ llCodePtr, mlCodePtr, ofCodePtr, -+ cctxParams, -+ op, (size_t)(oend-op), -+ bmi2, writeLitEntropy, writeSeqEntropy, -+ &litEntropyWritten, &seqEntropyWritten, -+ 0); -+ FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); -+ -+ /* check compressibility, update state components */ -+ if (cSize > 0 && cSize < decompressedSize) { -+ DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes", -+ (unsigned)decompressedSize, (unsigned)cSize); -+ assert(ip + decompressedSize <= iend); -+ ip += decompressedSize; -+ lp += litSize; -+ op += cSize; -+ llCodePtr += seqCount; -+ mlCodePtr += seqCount; -+ ofCodePtr += seqCount; -+ /* Entropy only needs to be written once */ -+ if (litEntropyWritten) { -+ writeLitEntropy = 0; -+ } -+ if (seqEntropyWritten) { -+ writeSeqEntropy = 0; -+ } -+ sp += seqCount; -+ blockBudgetSupp = 0; -+ } } -+ /* otherwise : do not compress yet, coalesce current sub-block with following one */ -+ } -+ } /* if (nbSeqs > 0) */ -+ -+ /* write last block */ -+ DEBUGLOG(5, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp)); -+ { int litEntropyWritten = 0; -+ int seqEntropyWritten = 0; -+ size_t litSize = (size_t)(lend - lp); -+ size_t seqCount = (size_t)(send - sp); -+ const size_t decompressedSize = -+ ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1); -+ size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, -+ sp, seqCount, -+ lp, litSize, -+ llCodePtr, mlCodePtr, ofCodePtr, -+ cctxParams, -+ op, (size_t)(oend-op), -+ bmi2, writeLitEntropy, writeSeqEntropy, -+ &litEntropyWritten, &seqEntropyWritten, -+ lastBlock); -+ FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); -+ -+ /* update pointers, the nb of literals borrowed from next sequence must be preserved */ -+ if (cSize > 0 && cSize < decompressedSize) { -+ DEBUGLOG(5, "Last sub-block compressed %u bytes => %u bytes", -+ (unsigned)decompressedSize, (unsigned)cSize); -+ assert(ip + decompressedSize <= iend); -+ ip += decompressedSize; -+ lp += litSize; -+ op += cSize; -+ llCodePtr += seqCount; -+ mlCodePtr += seqCount; -+ ofCodePtr += seqCount; -+ /* Entropy only needs to be written once */ -+ if (litEntropyWritten) { -+ writeLitEntropy = 0; - } -+ if (seqEntropyWritten) { -+ writeSeqEntropy = 0; -+ } -+ sp += seqCount; - } -- } while (!lastSequence); -+ } -+ -+ - if (writeLitEntropy) { -- DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten"); -+ DEBUGLOG(5, "Literal entropy tables were never written"); - ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); - } - if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) { - /* If we haven't written our entropy tables, then we've violated our contract and - * must emit an uncompressed block. - */ -- DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten"); -+ DEBUGLOG(5, "Sequence entropy tables were never written => cancel, emit an uncompressed block"); - return 0; - } -+ - if (ip < iend) { -- size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock); -- DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip)); -+ /* some data left : last part of the block sent uncompressed */ -+ size_t const rSize = (size_t)((iend - ip)); -+ size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock); -+ DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize)); - FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); - assert(cSize != 0); - op += cSize; - /* We have to regenerate the repcodes because we've skipped some sequences */ - if (sp < send) { -- seqDef const* seq; -- repcodes_t rep; -+ const SeqDef* seq; -+ Repcodes_t rep; - ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep)); - for (seq = sstart; seq < sp; ++seq) { -- ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); -+ ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); - } - ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep)); - } - } -- DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed"); -- return op-ostart; -+ -+ DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u", -+ (unsigned)(op-ostart)); -+ return (size_t)(op-ostart); - } - - size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, -- void const* src, size_t srcSize, -- unsigned lastBlock) { -+ const void* src, size_t srcSize, -+ unsigned lastBlock) -+{ - ZSTD_entropyCTablesMetadata_t entropyMetadata; - - FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore, -@@ -559,7 +675,7 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx - &zc->blockState.nextCBlock->entropy, - &zc->appliedParams, - &entropyMetadata, -- zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); -+ zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */), ""); - - return ZSTD_compressSubBlock_multi(&zc->seqStore, - zc->blockState.prevCBlock, -@@ -569,5 +685,5 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx - dst, dstCapacity, - src, srcSize, - zc->bmi2, lastBlock, -- zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */); -+ zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */); - } ---- a/lib/zstd/compress/zstd_compress_superblock.h -+++ b/lib/zstd/compress/zstd_compress_superblock.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the ---- a/lib/zstd/compress/zstd_cwksp.h -+++ b/lib/zstd/compress/zstd_cwksp.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -14,8 +15,10 @@ - /*-************************************* - * Dependencies - ***************************************/ -+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */ - #include "../common/zstd_internal.h" -- -+#include "../common/portability_macros.h" -+#include "../common/compiler.h" /* ZS2_isPower2 */ - - /*-************************************* - * Constants -@@ -41,8 +44,9 @@ - ***************************************/ - typedef enum { - ZSTD_cwksp_alloc_objects, -- ZSTD_cwksp_alloc_buffers, -- ZSTD_cwksp_alloc_aligned -+ ZSTD_cwksp_alloc_aligned_init_once, -+ ZSTD_cwksp_alloc_aligned, -+ ZSTD_cwksp_alloc_buffers - } ZSTD_cwksp_alloc_phase_e; - - /* -@@ -95,8 +99,8 @@ typedef enum { - * - * Workspace Layout: - * -- * [ ... workspace ... ] -- * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers] -+ * [ ... workspace ... ] -+ * [objects][tables ->] free space [<- buffers][<- aligned][<- init once] - * - * The various objects that live in the workspace are divided into the - * following categories, and are allocated separately: -@@ -120,9 +124,18 @@ typedef enum { - * uint32_t arrays, all of whose values are between 0 and (nextSrc - base). - * Their sizes depend on the cparams. These tables are 64-byte aligned. - * -- * - Aligned: these buffers are used for various purposes that require 4 byte -- * alignment, but don't require any initialization before they're used. These -- * buffers are each aligned to 64 bytes. -+ * - Init once: these buffers require to be initialized at least once before -+ * use. They should be used when we want to skip memory initialization -+ * while not triggering memory checkers (like Valgrind) when reading from -+ * from this memory without writing to it first. -+ * These buffers should be used carefully as they might contain data -+ * from previous compressions. -+ * Buffers are aligned to 64 bytes. -+ * -+ * - Aligned: these buffers don't require any initialization before they're -+ * used. The user of the buffer should make sure they write into a buffer -+ * location before reading from it. -+ * Buffers are aligned to 64 bytes. - * - * - Buffers: these buffers are used for various purposes that don't require - * any alignment or initialization before they're used. This means they can -@@ -134,8 +147,9 @@ typedef enum { - * correctly packed into the workspace buffer. That order is: - * - * 1. Objects -- * 2. Buffers -- * 3. Aligned/Tables -+ * 2. Init once / Tables -+ * 3. Aligned / Tables -+ * 4. Buffers / Tables - * - * Attempts to reserve objects of different types out of order will fail. - */ -@@ -147,6 +161,7 @@ typedef struct { - void* tableEnd; - void* tableValidEnd; - void* allocStart; -+ void* initOnceStart; - - BYTE allocFailed; - int workspaceOversizedDuration; -@@ -159,6 +174,7 @@ typedef struct { - ***************************************/ - - MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws); -+MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws); - - MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { - (void)ws; -@@ -168,14 +184,16 @@ MEM_STATIC void ZSTD_cwksp_assert_intern - assert(ws->tableEnd <= ws->allocStart); - assert(ws->tableValidEnd <= ws->allocStart); - assert(ws->allocStart <= ws->workspaceEnd); -+ assert(ws->initOnceStart <= ZSTD_cwksp_initialAllocStart(ws)); -+ assert(ws->workspace <= ws->initOnceStart); - } - - /* - * Align must be a power of 2. - */ --MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { -+MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t align) { - size_t const mask = align - 1; -- assert((align & mask) == 0); -+ assert(ZSTD_isPower2(align)); - return (size + mask) & ~mask; - } - -@@ -189,7 +207,7 @@ MEM_STATIC size_t ZSTD_cwksp_align(size_ - * to figure out how much space you need for the matchState tables. Everything - * else is though. - * -- * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size(). -+ * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned64_alloc_size(). - */ - MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { - if (size == 0) -@@ -197,12 +215,16 @@ MEM_STATIC size_t ZSTD_cwksp_alloc_size( - return size; - } - -+MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size, size_t alignment) { -+ return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, alignment)); -+} -+ - /* - * Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes. - * Used to determine the number of bytes required for a given "aligned". - */ --MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) { -- return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES)); -+MEM_STATIC size_t ZSTD_cwksp_aligned64_alloc_size(size_t size) { -+ return ZSTD_cwksp_aligned_alloc_size(size, ZSTD_CWKSP_ALIGNMENT_BYTES); - } - - /* -@@ -210,14 +232,10 @@ MEM_STATIC size_t ZSTD_cwksp_aligned_all - * for internal purposes (currently only alignment). - */ - MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) { -- /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes -- * to align the beginning of tables section, as well as another n_2=[0, 63] bytes -- * to align the beginning of the aligned section. -- * -- * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and -- * aligneds being sized in multiples of 64 bytes. -+ /* For alignment, the wksp will always allocate an additional 2*ZSTD_CWKSP_ALIGNMENT_BYTES -+ * bytes to align the beginning of tables section and end of buffers; - */ -- size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES; -+ size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES * 2; - return slackSpace; - } - -@@ -229,12 +247,24 @@ MEM_STATIC size_t ZSTD_cwksp_slack_space - MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) { - size_t const alignBytesMask = alignBytes - 1; - size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask; -- assert((alignBytes & alignBytesMask) == 0); -- assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES); -+ assert(ZSTD_isPower2(alignBytes)); -+ assert(bytes < alignBytes); - return bytes; - } - - /* -+ * Returns the initial value for allocStart which is used to determine the position from -+ * which we can allocate from the end of the workspace. -+ */ -+MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws) -+{ -+ char* endPtr = (char*)ws->workspaceEnd; -+ assert(ZSTD_isPower2(ZSTD_CWKSP_ALIGNMENT_BYTES)); -+ endPtr = endPtr - ((size_t)endPtr % ZSTD_CWKSP_ALIGNMENT_BYTES); -+ return (void*)endPtr; -+} -+ -+/* - * Internal function. Do not use directly. - * Reserves the given number of bytes within the aligned/buffer segment of the wksp, - * which counts from the end of the wksp (as opposed to the object/table segment). -@@ -246,7 +276,7 @@ ZSTD_cwksp_reserve_internal_buffer_space - { - void* const alloc = (BYTE*)ws->allocStart - bytes; - void* const bottom = ws->tableEnd; -- DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", -+ DEBUGLOG(5, "cwksp: reserving [0x%p]:%zd bytes; %zd bytes remaining", - alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); - ZSTD_cwksp_assert_internal_consistency(ws); - assert(alloc >= bottom); -@@ -274,27 +304,16 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_c - { - assert(phase >= ws->phase); - if (phase > ws->phase) { -- /* Going from allocating objects to allocating buffers */ -- if (ws->phase < ZSTD_cwksp_alloc_buffers && -- phase >= ZSTD_cwksp_alloc_buffers) { -+ /* Going from allocating objects to allocating initOnce / tables */ -+ if (ws->phase < ZSTD_cwksp_alloc_aligned_init_once && -+ phase >= ZSTD_cwksp_alloc_aligned_init_once) { - ws->tableValidEnd = ws->objectEnd; -- } -+ ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws); - -- /* Going from allocating buffers to allocating aligneds/tables */ -- if (ws->phase < ZSTD_cwksp_alloc_aligned && -- phase >= ZSTD_cwksp_alloc_aligned) { -- { /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */ -- size_t const bytesToAlign = -- ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES); -- DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign); -- ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */ -- RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign), -- memory_allocation, "aligned phase - alignment initial allocation failed!"); -- } - { /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */ -- void* const alloc = ws->objectEnd; -+ void *const alloc = ws->objectEnd; - size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES); -- void* const objectEnd = (BYTE*)alloc + bytesToAlign; -+ void *const objectEnd = (BYTE *) alloc + bytesToAlign; - DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign); - RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation, - "table phase - alignment initial allocation failed!"); -@@ -302,7 +321,9 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_c - ws->tableEnd = objectEnd; /* table area starts being empty */ - if (ws->tableValidEnd < ws->tableEnd) { - ws->tableValidEnd = ws->tableEnd; -- } } } -+ } -+ } -+ } - ws->phase = phase; - ZSTD_cwksp_assert_internal_consistency(ws); - } -@@ -314,7 +335,7 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_c - */ - MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) - { -- return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); -+ return (ptr != NULL) && (ws->workspace <= ptr) && (ptr < ws->workspaceEnd); - } - - /* -@@ -345,29 +366,61 @@ MEM_STATIC BYTE* ZSTD_cwksp_reserve_buff - - /* - * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes). -+ * This memory has been initialized at least once in the past. -+ * This doesn't mean it has been initialized this time, and it might contain data from previous -+ * operations. -+ * The main usage is for algorithms that might need read access into uninitialized memory. -+ * The algorithm must maintain safety under these conditions and must make sure it doesn't -+ * leak any of the past data (directly or in side channels). - */ --MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) -+MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t bytes) - { -- void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES), -- ZSTD_cwksp_alloc_aligned); -- assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0); -+ size_t const alignedBytes = ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES); -+ void* ptr = ZSTD_cwksp_reserve_internal(ws, alignedBytes, ZSTD_cwksp_alloc_aligned_init_once); -+ assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0); -+ if(ptr && ptr < ws->initOnceStart) { -+ /* We assume the memory following the current allocation is either: -+ * 1. Not usable as initOnce memory (end of workspace) -+ * 2. Another initOnce buffer that has been allocated before (and so was previously memset) -+ * 3. An ASAN redzone, in which case we don't want to write on it -+ * For these reasons it should be fine to not explicitly zero every byte up to ws->initOnceStart. -+ * Note that we assume here that MSAN and ASAN cannot run in the same time. */ -+ ZSTD_memset(ptr, 0, MIN((size_t)((U8*)ws->initOnceStart - (U8*)ptr), alignedBytes)); -+ ws->initOnceStart = ptr; -+ } -+ return ptr; -+} -+ -+/* -+ * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes). -+ */ -+MEM_STATIC void* ZSTD_cwksp_reserve_aligned64(ZSTD_cwksp* ws, size_t bytes) -+{ -+ void* const ptr = ZSTD_cwksp_reserve_internal(ws, -+ ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES), -+ ZSTD_cwksp_alloc_aligned); -+ assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0); - return ptr; - } - - /* - * Aligned on 64 bytes. These buffers have the special property that -- * their values remain constrained, allowing us to re-use them without -+ * their values remain constrained, allowing us to reuse them without - * memset()-ing them. - */ - MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) - { -- const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; -+ const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned_init_once; - void* alloc; - void* end; - void* top; - -- if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) { -- return NULL; -+ /* We can only start allocating tables after we are done reserving space for objects at the -+ * start of the workspace */ -+ if(ws->phase < phase) { -+ if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) { -+ return NULL; -+ } - } - alloc = ws->tableEnd; - end = (BYTE *)alloc + bytes; -@@ -387,7 +440,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_tabl - - - assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0); -- assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0); -+ assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0); - return alloc; - } - -@@ -421,6 +474,20 @@ MEM_STATIC void* ZSTD_cwksp_reserve_obje - - return alloc; - } -+/* -+ * with alignment control -+ * Note : should happen only once, at workspace first initialization -+ */ -+MEM_STATIC void* ZSTD_cwksp_reserve_object_aligned(ZSTD_cwksp* ws, size_t byteSize, size_t alignment) -+{ -+ size_t const mask = alignment - 1; -+ size_t const surplus = (alignment > sizeof(void*)) ? alignment - sizeof(void*) : 0; -+ void* const start = ZSTD_cwksp_reserve_object(ws, byteSize + surplus); -+ if (start == NULL) return NULL; -+ if (surplus == 0) return start; -+ assert(ZSTD_isPower2(alignment)); -+ return (void*)(((size_t)start + surplus) & ~mask); -+} - - MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) - { -@@ -451,7 +518,7 @@ MEM_STATIC void ZSTD_cwksp_clean_tables( - assert(ws->tableValidEnd >= ws->objectEnd); - assert(ws->tableValidEnd <= ws->allocStart); - if (ws->tableValidEnd < ws->tableEnd) { -- ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); -+ ZSTD_memset(ws->tableValidEnd, 0, (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd)); - } - ZSTD_cwksp_mark_tables_clean(ws); - } -@@ -460,7 +527,8 @@ MEM_STATIC void ZSTD_cwksp_clean_tables( - * Invalidates table allocations. - * All other allocations remain valid. - */ --MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { -+MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) -+{ - DEBUGLOG(4, "cwksp: clearing tables!"); - - -@@ -478,14 +546,23 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cw - - - ws->tableEnd = ws->objectEnd; -- ws->allocStart = ws->workspaceEnd; -+ ws->allocStart = ZSTD_cwksp_initialAllocStart(ws); - ws->allocFailed = 0; -- if (ws->phase > ZSTD_cwksp_alloc_buffers) { -- ws->phase = ZSTD_cwksp_alloc_buffers; -+ if (ws->phase > ZSTD_cwksp_alloc_aligned_init_once) { -+ ws->phase = ZSTD_cwksp_alloc_aligned_init_once; - } - ZSTD_cwksp_assert_internal_consistency(ws); - } - -+MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { -+ return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); -+} -+ -+MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) { -+ return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace) -+ + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart); -+} -+ - /* - * The provided workspace takes ownership of the buffer [start, start+size). - * Any existing values in the workspace are ignored (the previously managed -@@ -498,6 +575,7 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwk - ws->workspaceEnd = (BYTE*)start + size; - ws->objectEnd = ws->workspace; - ws->tableValidEnd = ws->objectEnd; -+ ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws); - ws->phase = ZSTD_cwksp_alloc_objects; - ws->isStatic = isStatic; - ZSTD_cwksp_clear(ws); -@@ -529,15 +607,6 @@ MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwk - ZSTD_memset(src, 0, sizeof(ZSTD_cwksp)); - } - --MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { -- return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); --} -- --MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) { -- return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace) -- + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart); --} -- - MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { - return ws->allocFailed; - } -@@ -550,17 +619,11 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed - * Returns if the estimated space needed for a wksp is within an acceptable limit of the - * actual amount of space used. - */ --MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws, -- size_t const estimatedSpace, int resizedWorkspace) { -- if (resizedWorkspace) { -- /* Resized/newly allocated wksp should have exact bounds */ -- return ZSTD_cwksp_used(ws) == estimatedSpace; -- } else { -- /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes -- * than estimatedSpace. See the comments in zstd_cwksp.h for details. -- */ -- return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63); -- } -+MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp *const ws, size_t const estimatedSpace) { -+ /* We have an alignment space between objects and tables between tables and buffers, so we can have up to twice -+ * the alignment bytes difference between estimation and actual usage */ -+ return (estimatedSpace - ZSTD_cwksp_slack_space_required()) <= ZSTD_cwksp_used(ws) && -+ ZSTD_cwksp_used(ws) <= estimatedSpace; - } - - -@@ -591,5 +654,4 @@ MEM_STATIC void ZSTD_cwksp_bump_oversize - } - } - -- - #endif /* ZSTD_CWKSP_H */ ---- a/lib/zstd/compress/zstd_double_fast.c -+++ b/lib/zstd/compress/zstd_double_fast.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -11,8 +12,49 @@ - #include "zstd_compress_internal.h" - #include "zstd_double_fast.h" - -+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR - --void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_fillDoubleHashTableForCDict(ZSTD_MatchState_t* ms, -+ void const* end, ZSTD_dictTableLoadMethod_e dtlm) -+{ -+ const ZSTD_compressionParameters* const cParams = &ms->cParams; -+ U32* const hashLarge = ms->hashTable; -+ U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; -+ U32 const mls = cParams->minMatch; -+ U32* const hashSmall = ms->chainTable; -+ U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS; -+ const BYTE* const base = ms->window.base; -+ const BYTE* ip = base + ms->nextToUpdate; -+ const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; -+ const U32 fastHashFillStep = 3; -+ -+ /* Always insert every fastHashFillStep position into the hash tables. -+ * Insert the other positions into the large hash table if their entry -+ * is empty. -+ */ -+ for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { -+ U32 const curr = (U32)(ip - base); -+ U32 i; -+ for (i = 0; i < fastHashFillStep; ++i) { -+ size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls); -+ size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8); -+ if (i == 0) { -+ ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i); -+ } -+ if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { -+ ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i); -+ } -+ /* Only load extra positions for ZSTD_dtlm_full */ -+ if (dtlm == ZSTD_dtlm_fast) -+ break; -+ } } -+} -+ -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_fillDoubleHashTableForCCtx(ZSTD_MatchState_t* ms, - void const* end, ZSTD_dictTableLoadMethod_e dtlm) - { - const ZSTD_compressionParameters* const cParams = &ms->cParams; -@@ -43,13 +85,26 @@ void ZSTD_fillDoubleHashTable(ZSTD_match - /* Only load extra positions for ZSTD_dtlm_full */ - if (dtlm == ZSTD_dtlm_fast) - break; -- } } -+ } } -+} -+ -+void ZSTD_fillDoubleHashTable(ZSTD_MatchState_t* ms, -+ const void* const end, -+ ZSTD_dictTableLoadMethod_e dtlm, -+ ZSTD_tableFillPurpose_e tfp) -+{ -+ if (tfp == ZSTD_tfp_forCDict) { -+ ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm); -+ } else { -+ ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm); -+ } - } - - - FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_compressBlock_doubleFast_noDict_generic( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, U32 const mls /* template */) - { - ZSTD_compressionParameters const* cParams = &ms->cParams; -@@ -67,7 +122,7 @@ size_t ZSTD_compressBlock_doubleFast_noD - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=rep[0], offset_2=rep[1]; -- U32 offsetSaved = 0; -+ U32 offsetSaved1 = 0, offsetSaved2 = 0; - - size_t mLength; - U32 offset; -@@ -88,9 +143,14 @@ size_t ZSTD_compressBlock_doubleFast_noD - const BYTE* matchl0; /* the long match for ip */ - const BYTE* matchs0; /* the short match for ip */ - const BYTE* matchl1; /* the long match for ip1 */ -+ const BYTE* matchs0_safe; /* matchs0 or safe address */ - - const BYTE* ip = istart; /* the current position */ - const BYTE* ip1; /* the next position */ -+ /* Array of ~random data, should have low probability of matching data -+ * we load from here instead of from tables, if matchl0/matchl1 are -+ * invalid indices. Used to avoid unpredictable branches. */ -+ const BYTE dummy[] = {0x12,0x34,0x56,0x78,0x9a,0xbc,0xde,0xf0,0xe2,0xb4}; - - DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic"); - -@@ -100,8 +160,8 @@ size_t ZSTD_compressBlock_doubleFast_noD - U32 const current = (U32)(ip - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); - U32 const maxRep = current - windowLow; -- if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; -- if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; -+ if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0; -+ if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0; - } - - /* Outer Loop: one iteration per match found and stored */ -@@ -131,30 +191,35 @@ size_t ZSTD_compressBlock_doubleFast_noD - if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { - mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; - ip++; -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); -+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); - goto _match_stored; - } - - hl1 = ZSTD_hashPtr(ip1, hBitsL, 8); - -- if (idxl0 > prefixLowestIndex) { -+ /* idxl0 > prefixLowestIndex is a (somewhat) unpredictable branch. -+ * However expression below complies into conditional move. Since -+ * match is unlikely and we only *branch* on idxl0 > prefixLowestIndex -+ * if there is a match, all branches become predictable. */ -+ { const BYTE* const matchl0_safe = ZSTD_selectAddr(idxl0, prefixLowestIndex, matchl0, &dummy[0]); -+ - /* check prefix long match */ -- if (MEM_read64(matchl0) == MEM_read64(ip)) { -+ if (MEM_read64(matchl0_safe) == MEM_read64(ip) && matchl0_safe == matchl0) { - mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8; - offset = (U32)(ip-matchl0); - while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */ - goto _match_found; -- } -- } -+ } } - - idxl1 = hashLong[hl1]; - matchl1 = base + idxl1; - -- if (idxs0 > prefixLowestIndex) { -- /* check prefix short match */ -- if (MEM_read32(matchs0) == MEM_read32(ip)) { -- goto _search_next_long; -- } -+ /* Same optimization as matchl0 above */ -+ matchs0_safe = ZSTD_selectAddr(idxs0, prefixLowestIndex, matchs0, &dummy[0]); -+ -+ /* check prefix short match */ -+ if(MEM_read32(matchs0_safe) == MEM_read32(ip) && matchs0_safe == matchs0) { -+ goto _search_next_long; - } - - if (ip1 >= nextStep) { -@@ -175,30 +240,36 @@ size_t ZSTD_compressBlock_doubleFast_noD - } while (ip1 <= ilimit); - - _cleanup: -+ /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0), -+ * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */ -+ offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2; -+ - /* save reps for next block */ -- rep[0] = offset_1 ? offset_1 : offsetSaved; -- rep[1] = offset_2 ? offset_2 : offsetSaved; -+ rep[0] = offset_1 ? offset_1 : offsetSaved1; -+ rep[1] = offset_2 ? offset_2 : offsetSaved2; - - /* Return the last literals size */ - return (size_t)(iend - anchor); - - _search_next_long: - -- /* check prefix long +1 match */ -- if (idxl1 > prefixLowestIndex) { -- if (MEM_read64(matchl1) == MEM_read64(ip1)) { -+ /* short match found: let's check for a longer one */ -+ mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; -+ offset = (U32)(ip - matchs0); -+ -+ /* check long match at +1 position */ -+ if ((idxl1 > prefixLowestIndex) && (MEM_read64(matchl1) == MEM_read64(ip1))) { -+ size_t const l1len = ZSTD_count(ip1+8, matchl1+8, iend) + 8; -+ if (l1len > mLength) { -+ /* use the long match instead */ - ip = ip1; -- mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8; -+ mLength = l1len; - offset = (U32)(ip-matchl1); -- while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ -- goto _match_found; -+ matchs0 = matchl1; - } - } - -- /* if no long +1 match, explore the short match we found */ -- mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; -- offset = (U32)(ip - matchs0); -- while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ -+ while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* complete backward */ - - /* fall-through */ - -@@ -217,7 +288,7 @@ _match_found: /* requires ip, offset, mL - hashLong[hl1] = (U32)(ip1 - base); - } - -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); -+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); - - _match_stored: - /* match found */ -@@ -243,7 +314,7 @@ _match_stored: - U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); -- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength); -+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength); - ip += rLength; - anchor = ip; - continue; /* faster when present ... (?) */ -@@ -254,8 +325,9 @@ _match_stored: - - - FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, - U32 const mls /* template */) - { -@@ -275,9 +347,8 @@ size_t ZSTD_compressBlock_doubleFast_dic - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=rep[0], offset_2=rep[1]; -- U32 offsetSaved = 0; - -- const ZSTD_matchState_t* const dms = ms->dictMatchState; -+ const ZSTD_MatchState_t* const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dictCParams = &dms->cParams; - const U32* const dictHashLong = dms->hashTable; - const U32* const dictHashSmall = dms->chainTable; -@@ -286,8 +357,8 @@ size_t ZSTD_compressBlock_doubleFast_dic - const BYTE* const dictStart = dictBase + dictStartIndex; - const BYTE* const dictEnd = dms->window.nextSrc; - const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase); -- const U32 dictHBitsL = dictCParams->hashLog; -- const U32 dictHBitsS = dictCParams->chainLog; -+ const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; -+ const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS; - const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); - - DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic"); -@@ -295,6 +366,13 @@ size_t ZSTD_compressBlock_doubleFast_dic - /* if a dictionary is attached, it must be within window range */ - assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); - -+ if (ms->prefetchCDictTables) { -+ size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32); -+ size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32); -+ PREFETCH_AREA(dictHashLong, hashTableBytes); -+ PREFETCH_AREA(dictHashSmall, chainTableBytes); -+ } -+ - /* init */ - ip += (dictAndPrefixLength == 0); - -@@ -309,8 +387,12 @@ size_t ZSTD_compressBlock_doubleFast_dic - U32 offset; - size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); - size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); -- size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8); -- size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls); -+ size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8); -+ size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls); -+ U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS]; -+ U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS]; -+ int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL); -+ int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS); - U32 const curr = (U32)(ip-base); - U32 const matchIndexL = hashLong[h2]; - U32 matchIndexS = hashSmall[h]; -@@ -323,26 +405,24 @@ size_t ZSTD_compressBlock_doubleFast_dic - hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ - - /* check repcode */ -- if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) -+ if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex)) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; - ip++; -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); -+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); - goto _match_stored; - } - -- if (matchIndexL > prefixLowestIndex) { -+ if ((matchIndexL >= prefixLowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { - /* check prefix long match */ -- if (MEM_read64(matchLong) == MEM_read64(ip)) { -- mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; -- offset = (U32)(ip-matchLong); -- while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ -- goto _match_found; -- } -- } else { -+ mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; -+ offset = (U32)(ip-matchLong); -+ while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ -+ goto _match_found; -+ } else if (dictTagsMatchL) { - /* check dictMatchState long match */ -- U32 const dictMatchIndexL = dictHashLong[dictHL]; -+ U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS; - const BYTE* dictMatchL = dictBase + dictMatchIndexL; - assert(dictMatchL < dictEnd); - -@@ -354,13 +434,13 @@ size_t ZSTD_compressBlock_doubleFast_dic - } } - - if (matchIndexS > prefixLowestIndex) { -- /* check prefix short match */ -+ /* short match candidate */ - if (MEM_read32(match) == MEM_read32(ip)) { - goto _search_next_long; - } -- } else { -+ } else if (dictTagsMatchS) { - /* check dictMatchState short match */ -- U32 const dictMatchIndexS = dictHashSmall[dictHS]; -+ U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS; - match = dictBase + dictMatchIndexS; - matchIndexS = dictMatchIndexS + dictIndexDelta; - -@@ -375,25 +455,24 @@ size_t ZSTD_compressBlock_doubleFast_dic - continue; - - _search_next_long: -- - { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); -- size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); -+ size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8); - U32 const matchIndexL3 = hashLong[hl3]; -+ U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS]; -+ int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3); - const BYTE* matchL3 = base + matchIndexL3; - hashLong[hl3] = curr + 1; - - /* check prefix long +1 match */ -- if (matchIndexL3 > prefixLowestIndex) { -- if (MEM_read64(matchL3) == MEM_read64(ip+1)) { -- mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; -- ip++; -- offset = (U32)(ip-matchL3); -- while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ -- goto _match_found; -- } -- } else { -+ if ((matchIndexL3 >= prefixLowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1))) { -+ mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; -+ ip++; -+ offset = (U32)(ip-matchL3); -+ while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ -+ goto _match_found; -+ } else if (dictTagsMatchL3) { - /* check dict long +1 match */ -- U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; -+ U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS; - const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; - assert(dictMatchL3 < dictEnd); - if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { -@@ -419,7 +498,7 @@ _match_found: - offset_2 = offset_1; - offset_1 = offset; - -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); -+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); - - _match_stored: - /* match found */ -@@ -443,12 +522,12 @@ _match_stored: - const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ? - dictBase + repIndex2 - dictIndexDelta : - base + repIndex2; -- if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) -+ if ( (ZSTD_index_overlap_check(prefixLowestIndex, repIndex2)) - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ -- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); -+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; - ip += repLength2; -@@ -461,8 +540,8 @@ _match_stored: - } /* while (ip < ilimit) */ - - /* save reps for next block */ -- rep[0] = offset_1 ? offset_1 : offsetSaved; -- rep[1] = offset_2 ? offset_2 : offsetSaved; -+ rep[0] = offset_1; -+ rep[1] = offset_2; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -@@ -470,7 +549,7 @@ _match_stored: - - #define ZSTD_GEN_DFAST_FN(dictMode, mls) \ - static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \ -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ - void const* src, size_t srcSize) \ - { \ - return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ -@@ -488,7 +567,7 @@ ZSTD_GEN_DFAST_FN(dictMatchState, 7) - - - size_t ZSTD_compressBlock_doubleFast( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { - const U32 mls = ms->cParams.minMatch; -@@ -508,7 +587,7 @@ size_t ZSTD_compressBlock_doubleFast( - - - size_t ZSTD_compressBlock_doubleFast_dictMatchState( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { - const U32 mls = ms->cParams.minMatch; -@@ -527,8 +606,10 @@ size_t ZSTD_compressBlock_doubleFast_dic - } - - --static size_t ZSTD_compressBlock_doubleFast_extDict_generic( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t ZSTD_compressBlock_doubleFast_extDict_generic( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, - U32 const mls /* template */) - { -@@ -579,13 +660,13 @@ static size_t ZSTD_compressBlock_doubleF - size_t mLength; - hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ - -- if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ -+ if (((ZSTD_index_overlap_check(prefixStartIndex, repIndex)) - & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */ - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; - ip++; -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); -+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); - } else { - if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { - const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; -@@ -596,7 +677,7 @@ static size_t ZSTD_compressBlock_doubleF - while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); -+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); - - } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { - size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); -@@ -621,7 +702,7 @@ static size_t ZSTD_compressBlock_doubleF - } - offset_2 = offset_1; - offset_1 = offset; -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); -+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); - - } else { - ip += ((ip-anchor) >> kSearchStrength) + 1; -@@ -647,13 +728,13 @@ static size_t ZSTD_compressBlock_doubleF - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; -- if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ -+ if ( ((ZSTD_index_overlap_check(prefixStartIndex, repIndex2)) - & (offset_2 <= current2 - dictStartIndex)) - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; - U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ -- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); -+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; - ip += repLength2; -@@ -677,7 +758,7 @@ ZSTD_GEN_DFAST_FN(extDict, 6) - ZSTD_GEN_DFAST_FN(extDict, 7) - - size_t ZSTD_compressBlock_doubleFast_extDict( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { - U32 const mls = ms->cParams.minMatch; -@@ -694,3 +775,5 @@ size_t ZSTD_compressBlock_doubleFast_ext - return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize); - } - } -+ -+#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */ ---- a/lib/zstd/compress/zstd_double_fast.h -+++ b/lib/zstd/compress/zstd_double_fast.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -11,22 +12,32 @@ - #ifndef ZSTD_DOUBLE_FAST_H - #define ZSTD_DOUBLE_FAST_H - -- - #include "../common/mem.h" /* U32 */ - #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ - --void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, -- void const* end, ZSTD_dictTableLoadMethod_e dtlm); -+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR -+ -+void ZSTD_fillDoubleHashTable(ZSTD_MatchState_t* ms, -+ void const* end, ZSTD_dictTableLoadMethod_e dtlm, -+ ZSTD_tableFillPurpose_e tfp); -+ - size_t ZSTD_compressBlock_doubleFast( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - size_t ZSTD_compressBlock_doubleFast_dictMatchState( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - size_t ZSTD_compressBlock_doubleFast_extDict( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - -- -+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST ZSTD_compressBlock_doubleFast -+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE ZSTD_compressBlock_doubleFast_dictMatchState -+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT ZSTD_compressBlock_doubleFast_extDict -+#else -+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST NULL -+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE NULL -+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT NULL -+#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */ - - #endif /* ZSTD_DOUBLE_FAST_H */ ---- a/lib/zstd/compress/zstd_fast.c -+++ b/lib/zstd/compress/zstd_fast.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -11,8 +12,46 @@ - #include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */ - #include "zstd_fast.h" - -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_fillHashTableForCDict(ZSTD_MatchState_t* ms, -+ const void* const end, -+ ZSTD_dictTableLoadMethod_e dtlm) -+{ -+ const ZSTD_compressionParameters* const cParams = &ms->cParams; -+ U32* const hashTable = ms->hashTable; -+ U32 const hBits = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; -+ U32 const mls = cParams->minMatch; -+ const BYTE* const base = ms->window.base; -+ const BYTE* ip = base + ms->nextToUpdate; -+ const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; -+ const U32 fastHashFillStep = 3; -+ -+ /* Currently, we always use ZSTD_dtlm_full for filling CDict tables. -+ * Feel free to remove this assert if there's a good reason! */ -+ assert(dtlm == ZSTD_dtlm_full); -+ -+ /* Always insert every fastHashFillStep position into the hash table. -+ * Insert the other positions if their hash entry is empty. -+ */ -+ for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { -+ U32 const curr = (U32)(ip - base); -+ { size_t const hashAndTag = ZSTD_hashPtr(ip, hBits, mls); -+ ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr); } -+ -+ if (dtlm == ZSTD_dtlm_fast) continue; -+ /* Only load extra positions for ZSTD_dtlm_full */ -+ { U32 p; -+ for (p = 1; p < fastHashFillStep; ++p) { -+ size_t const hashAndTag = ZSTD_hashPtr(ip + p, hBits, mls); -+ if (hashTable[hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { /* not yet filled */ -+ ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr + p); -+ } } } } -+} - --void ZSTD_fillHashTable(ZSTD_matchState_t* ms, -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_fillHashTableForCCtx(ZSTD_MatchState_t* ms, - const void* const end, - ZSTD_dictTableLoadMethod_e dtlm) - { -@@ -25,6 +64,10 @@ void ZSTD_fillHashTable(ZSTD_matchState_ - const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; - const U32 fastHashFillStep = 3; - -+ /* Currently, we always use ZSTD_dtlm_fast for filling CCtx tables. -+ * Feel free to remove this assert if there's a good reason! */ -+ assert(dtlm == ZSTD_dtlm_fast); -+ - /* Always insert every fastHashFillStep position into the hash table. - * Insert the other positions if their hash entry is empty. - */ -@@ -42,6 +85,60 @@ void ZSTD_fillHashTable(ZSTD_matchState_ - } } } } - } - -+void ZSTD_fillHashTable(ZSTD_MatchState_t* ms, -+ const void* const end, -+ ZSTD_dictTableLoadMethod_e dtlm, -+ ZSTD_tableFillPurpose_e tfp) -+{ -+ if (tfp == ZSTD_tfp_forCDict) { -+ ZSTD_fillHashTableForCDict(ms, end, dtlm); -+ } else { -+ ZSTD_fillHashTableForCCtx(ms, end, dtlm); -+ } -+} -+ -+ -+typedef int (*ZSTD_match4Found) (const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit); -+ -+static int -+ZSTD_match4Found_cmov(const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit) -+{ -+ /* Array of ~random data, should have low probability of matching data. -+ * Load from here if the index is invalid. -+ * Used to avoid unpredictable branches. */ -+ static const BYTE dummy[] = {0x12,0x34,0x56,0x78}; -+ -+ /* currentIdx >= lowLimit is a (somewhat) unpredictable branch. -+ * However expression below compiles into conditional move. -+ */ -+ const BYTE* mvalAddr = ZSTD_selectAddr(matchIdx, idxLowLimit, matchAddress, dummy); -+ /* Note: this used to be written as : return test1 && test2; -+ * Unfortunately, once inlined, these tests become branches, -+ * in which case it becomes critical that they are executed in the right order (test1 then test2). -+ * So we have to write these tests in a specific manner to ensure their ordering. -+ */ -+ if (MEM_read32(currentPtr) != MEM_read32(mvalAddr)) return 0; -+ /* force ordering of these tests, which matters once the function is inlined, as they become branches */ -+ __asm__(""); -+ return matchIdx >= idxLowLimit; -+} -+ -+static int -+ZSTD_match4Found_branch(const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit) -+{ -+ /* using a branch instead of a cmov, -+ * because it's faster in scenarios where matchIdx >= idxLowLimit is generally true, -+ * aka almost all candidates are within range */ -+ U32 mval; -+ if (matchIdx >= idxLowLimit) { -+ mval = MEM_read32(matchAddress); -+ } else { -+ mval = MEM_read32(currentPtr) ^ 1; /* guaranteed to not match. */ -+ } -+ -+ return (MEM_read32(currentPtr) == mval); -+} -+ - - /* - * If you squint hard enough (and ignore repcodes), the search operation at any -@@ -89,17 +186,17 @@ void ZSTD_fillHashTable(ZSTD_matchState_ - * - * This is also the work we do at the beginning to enter the loop initially. - */ --FORCE_INLINE_TEMPLATE size_t --ZSTD_compressBlock_fast_noDict_generic( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t ZSTD_compressBlock_fast_noDict_generic( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, -- U32 const mls, U32 const hasStep) -+ U32 const mls, int useCmov) - { - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hlog = cParams->hashLog; -- /* support stepSize of 0 */ -- size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2; -+ size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; /* min 2 */ - const BYTE* const base = ms->window.base; - const BYTE* const istart = (const BYTE*)src; - const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); -@@ -117,12 +214,11 @@ ZSTD_compressBlock_fast_noDict_generic( - - U32 rep_offset1 = rep[0]; - U32 rep_offset2 = rep[1]; -- U32 offsetSaved = 0; -+ U32 offsetSaved1 = 0, offsetSaved2 = 0; - - size_t hash0; /* hash for ip0 */ - size_t hash1; /* hash for ip1 */ -- U32 idx; /* match idx for ip0 */ -- U32 mval; /* src value at match idx */ -+ U32 matchIdx; /* match idx for ip0 */ - - U32 offcode; - const BYTE* match0; -@@ -135,14 +231,15 @@ ZSTD_compressBlock_fast_noDict_generic( - size_t step; - const BYTE* nextStep; - const size_t kStepIncr = (1 << (kSearchStrength - 1)); -+ const ZSTD_match4Found matchFound = useCmov ? ZSTD_match4Found_cmov : ZSTD_match4Found_branch; - - DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); - ip0 += (ip0 == prefixStart); - { U32 const curr = (U32)(ip0 - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); - U32 const maxRep = curr - windowLow; -- if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0; -- if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0; -+ if (rep_offset2 > maxRep) offsetSaved2 = rep_offset2, rep_offset2 = 0; -+ if (rep_offset1 > maxRep) offsetSaved1 = rep_offset1, rep_offset1 = 0; - } - - /* start each op */ -@@ -163,7 +260,7 @@ _start: /* Requires: ip0 */ - hash0 = ZSTD_hashPtr(ip0, hlog, mls); - hash1 = ZSTD_hashPtr(ip1, hlog, mls); - -- idx = hashTable[hash0]; -+ matchIdx = hashTable[hash0]; - - do { - /* load repcode match for ip[2]*/ -@@ -180,26 +277,28 @@ _start: /* Requires: ip0 */ - mLength = ip0[-1] == match0[-1]; - ip0 -= mLength; - match0 -= mLength; -- offcode = STORE_REPCODE_1; -+ offcode = REPCODE1_TO_OFFBASE; - mLength += 4; -+ -+ /* Write next hash table entry: it's already calculated. -+ * This write is known to be safe because ip1 is before the -+ * repcode (ip2). */ -+ hashTable[hash1] = (U32)(ip1 - base); -+ - goto _match; - } - -- /* load match for ip[0] */ -- if (idx >= prefixStartIndex) { -- mval = MEM_read32(base + idx); -- } else { -- mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */ -- } -+ if (matchFound(ip0, base + matchIdx, matchIdx, prefixStartIndex)) { -+ /* Write next hash table entry (it's already calculated). -+ * This write is known to be safe because the ip1 == ip0 + 1, -+ * so searching will resume after ip1 */ -+ hashTable[hash1] = (U32)(ip1 - base); - -- /* check match at ip[0] */ -- if (MEM_read32(ip0) == mval) { -- /* found a match! */ - goto _offset; - } - - /* lookup ip[1] */ -- idx = hashTable[hash1]; -+ matchIdx = hashTable[hash1]; - - /* hash ip[2] */ - hash0 = hash1; -@@ -214,21 +313,19 @@ _start: /* Requires: ip0 */ - current0 = (U32)(ip0 - base); - hashTable[hash0] = current0; - -- /* load match for ip[0] */ -- if (idx >= prefixStartIndex) { -- mval = MEM_read32(base + idx); -- } else { -- mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */ -- } -- -- /* check match at ip[0] */ -- if (MEM_read32(ip0) == mval) { -- /* found a match! */ -+ if (matchFound(ip0, base + matchIdx, matchIdx, prefixStartIndex)) { -+ /* Write next hash table entry, since it's already calculated */ -+ if (step <= 4) { -+ /* Avoid writing an index if it's >= position where search will resume. -+ * The minimum possible match has length 4, so search can resume at ip0 + 4. -+ */ -+ hashTable[hash1] = (U32)(ip1 - base); -+ } - goto _offset; - } - - /* lookup ip[1] */ -- idx = hashTable[hash1]; -+ matchIdx = hashTable[hash1]; - - /* hash ip[2] */ - hash0 = hash1; -@@ -250,13 +347,28 @@ _start: /* Requires: ip0 */ - } while (ip3 < ilimit); - - _cleanup: -- /* Note that there are probably still a couple positions we could search. -+ /* Note that there are probably still a couple positions one could search. - * However, it seems to be a meaningful performance hit to try to search - * them. So let's not. */ - -+ /* When the repcodes are outside of the prefix, we set them to zero before the loop. -+ * When the offsets are still zero, we need to restore them after the block to have a correct -+ * repcode history. If only one offset was invalid, it is easy. The tricky case is when both -+ * offsets were invalid. We need to figure out which offset to refill with. -+ * - If both offsets are zero they are in the same order. -+ * - If both offsets are non-zero, we won't restore the offsets from `offsetSaved[12]`. -+ * - If only one is zero, we need to decide which offset to restore. -+ * - If rep_offset1 is non-zero, then rep_offset2 must be offsetSaved1. -+ * - It is impossible for rep_offset2 to be non-zero. -+ * -+ * So if rep_offset1 started invalid (offsetSaved1 != 0) and became valid (rep_offset1 != 0), then -+ * set rep[0] = rep_offset1 and rep[1] = offsetSaved1. -+ */ -+ offsetSaved2 = ((offsetSaved1 != 0) && (rep_offset1 != 0)) ? offsetSaved1 : offsetSaved2; -+ - /* save reps for next block */ -- rep[0] = rep_offset1 ? rep_offset1 : offsetSaved; -- rep[1] = rep_offset2 ? rep_offset2 : offsetSaved; -+ rep[0] = rep_offset1 ? rep_offset1 : offsetSaved1; -+ rep[1] = rep_offset2 ? rep_offset2 : offsetSaved2; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -@@ -264,10 +376,10 @@ _cleanup: - _offset: /* Requires: ip0, idx */ - - /* Compute the offset code. */ -- match0 = base + idx; -+ match0 = base + matchIdx; - rep_offset2 = rep_offset1; - rep_offset1 = (U32)(ip0-match0); -- offcode = STORE_OFFSET(rep_offset1); -+ offcode = OFFSET_TO_OFFBASE(rep_offset1); - mLength = 4; - - /* Count the backwards match length. */ -@@ -287,11 +399,6 @@ _match: /* Requires: ip0, match0, offcod - ip0 += mLength; - anchor = ip0; - -- /* write next hash table entry */ -- if (ip1 < ip0) { -- hashTable[hash1] = (U32)(ip1 - base); -- } -- - /* Fill table and check for immediate repcode. */ - if (ip0 <= ilimit) { - /* Fill Table */ -@@ -306,7 +413,7 @@ _match: /* Requires: ip0, match0, offcod - { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */ - hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); - ip0 += rLength; -- ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength); -+ ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, REPCODE1_TO_OFFBASE, rLength); - anchor = ip0; - continue; /* faster when present (confirmed on gcc-8) ... (?) */ - } } } -@@ -314,12 +421,12 @@ _match: /* Requires: ip0, match0, offcod - goto _start; - } - --#define ZSTD_GEN_FAST_FN(dictMode, mls, step) \ -- static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step( \ -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ -+#define ZSTD_GEN_FAST_FN(dictMode, mml, cmov) \ -+ static size_t ZSTD_compressBlock_fast_##dictMode##_##mml##_##cmov( \ -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ - void const* src, size_t srcSize) \ - { \ -- return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \ -+ return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mml, cmov); \ - } - - ZSTD_GEN_FAST_FN(noDict, 4, 1) -@@ -333,13 +440,15 @@ ZSTD_GEN_FAST_FN(noDict, 6, 0) - ZSTD_GEN_FAST_FN(noDict, 7, 0) - - size_t ZSTD_compressBlock_fast( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- U32 const mls = ms->cParams.minMatch; -+ U32 const mml = ms->cParams.minMatch; -+ /* use cmov when "candidate in range" branch is likely unpredictable */ -+ int const useCmov = ms->cParams.windowLog < 19; - assert(ms->dictMatchState == NULL); -- if (ms->cParams.targetLength > 1) { -- switch(mls) -+ if (useCmov) { -+ switch(mml) - { - default: /* includes case 3 */ - case 4 : -@@ -352,7 +461,8 @@ size_t ZSTD_compressBlock_fast( - return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize); - } - } else { -- switch(mls) -+ /* use a branch instead */ -+ switch(mml) - { - default: /* includes case 3 */ - case 4 : -@@ -364,13 +474,13 @@ size_t ZSTD_compressBlock_fast( - case 7 : - return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize); - } -- - } - } - - FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_compressBlock_fast_dictMatchState_generic( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, U32 const mls, U32 const hasStep) - { - const ZSTD_compressionParameters* const cParams = &ms->cParams; -@@ -380,16 +490,16 @@ size_t ZSTD_compressBlock_fast_dictMatch - U32 const stepSize = cParams->targetLength + !(cParams->targetLength); - const BYTE* const base = ms->window.base; - const BYTE* const istart = (const BYTE*)src; -- const BYTE* ip = istart; -+ const BYTE* ip0 = istart; -+ const BYTE* ip1 = ip0 + stepSize; /* we assert below that stepSize >= 1 */ - const BYTE* anchor = istart; - const U32 prefixStartIndex = ms->window.dictLimit; - const BYTE* const prefixStart = base + prefixStartIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=rep[0], offset_2=rep[1]; -- U32 offsetSaved = 0; - -- const ZSTD_matchState_t* const dms = ms->dictMatchState; -+ const ZSTD_MatchState_t* const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; - const U32* const dictHashTable = dms->hashTable; - const U32 dictStartIndex = dms->window.dictLimit; -@@ -397,13 +507,13 @@ size_t ZSTD_compressBlock_fast_dictMatch - const BYTE* const dictStart = dictBase + dictStartIndex; - const BYTE* const dictEnd = dms->window.nextSrc; - const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); -- const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); -- const U32 dictHLog = dictCParams->hashLog; -+ const U32 dictAndPrefixLength = (U32)(istart - prefixStart + dictEnd - dictStart); -+ const U32 dictHBits = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; - - /* if a dictionary is still attached, it necessarily means that - * it is within window size. So we just check it. */ - const U32 maxDistance = 1U << cParams->windowLog; -- const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); -+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - assert(endIndex - prefixStartIndex <= maxDistance); - (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ - -@@ -413,106 +523,154 @@ size_t ZSTD_compressBlock_fast_dictMatch - * when translating a dict index into a local index */ - assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); - -+ if (ms->prefetchCDictTables) { -+ size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32); -+ PREFETCH_AREA(dictHashTable, hashTableBytes); -+ } -+ - /* init */ - DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); -- ip += (dictAndPrefixLength == 0); -+ ip0 += (dictAndPrefixLength == 0); - /* dictMatchState repCode checks don't currently handle repCode == 0 - * disabling. */ - assert(offset_1 <= dictAndPrefixLength); - assert(offset_2 <= dictAndPrefixLength); - -- /* Main Search Loop */ -- while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ -+ /* Outer search loop */ -+ assert(stepSize >= 1); -+ while (ip1 <= ilimit) { /* repcode check at (ip0 + 1) is safe because ip0 < ip1 */ - size_t mLength; -- size_t const h = ZSTD_hashPtr(ip, hlog, mls); -- U32 const curr = (U32)(ip-base); -- U32 const matchIndex = hashTable[h]; -- const BYTE* match = base + matchIndex; -- const U32 repIndex = curr + 1 - offset_1; -- const BYTE* repMatch = (repIndex < prefixStartIndex) ? -- dictBase + (repIndex - dictIndexDelta) : -- base + repIndex; -- hashTable[h] = curr; /* update hash table */ -- -- if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ -- && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { -- const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; -- mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; -- ip++; -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); -- } else if ( (matchIndex <= prefixStartIndex) ) { -- size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); -- U32 const dictMatchIndex = dictHashTable[dictHash]; -- const BYTE* dictMatch = dictBase + dictMatchIndex; -- if (dictMatchIndex <= dictStartIndex || -- MEM_read32(dictMatch) != MEM_read32(ip)) { -- assert(stepSize >= 1); -- ip += ((ip-anchor) >> kSearchStrength) + stepSize; -- continue; -- } else { -- /* found a dict match */ -- U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta); -- mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; -- while (((ip>anchor) & (dictMatch>dictStart)) -- && (ip[-1] == dictMatch[-1])) { -- ip--; dictMatch--; mLength++; -+ size_t hash0 = ZSTD_hashPtr(ip0, hlog, mls); -+ -+ size_t const dictHashAndTag0 = ZSTD_hashPtr(ip0, dictHBits, mls); -+ U32 dictMatchIndexAndTag = dictHashTable[dictHashAndTag0 >> ZSTD_SHORT_CACHE_TAG_BITS]; -+ int dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag0); -+ -+ U32 matchIndex = hashTable[hash0]; -+ U32 curr = (U32)(ip0 - base); -+ size_t step = stepSize; -+ const size_t kStepIncr = 1 << kSearchStrength; -+ const BYTE* nextStep = ip0 + kStepIncr; -+ -+ /* Inner search loop */ -+ while (1) { -+ const BYTE* match = base + matchIndex; -+ const U32 repIndex = curr + 1 - offset_1; -+ const BYTE* repMatch = (repIndex < prefixStartIndex) ? -+ dictBase + (repIndex - dictIndexDelta) : -+ base + repIndex; -+ const size_t hash1 = ZSTD_hashPtr(ip1, hlog, mls); -+ size_t const dictHashAndTag1 = ZSTD_hashPtr(ip1, dictHBits, mls); -+ hashTable[hash0] = curr; /* update hash table */ -+ -+ if ((ZSTD_index_overlap_check(prefixStartIndex, repIndex)) -+ && (MEM_read32(repMatch) == MEM_read32(ip0 + 1))) { -+ const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; -+ mLength = ZSTD_count_2segments(ip0 + 1 + 4, repMatch + 4, iend, repMatchEnd, prefixStart) + 4; -+ ip0++; -+ ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); -+ break; -+ } -+ -+ if (dictTagsMatch) { -+ /* Found a possible dict match */ -+ const U32 dictMatchIndex = dictMatchIndexAndTag >> ZSTD_SHORT_CACHE_TAG_BITS; -+ const BYTE* dictMatch = dictBase + dictMatchIndex; -+ if (dictMatchIndex > dictStartIndex && -+ MEM_read32(dictMatch) == MEM_read32(ip0)) { -+ /* To replicate extDict parse behavior, we only use dict matches when the normal matchIndex is invalid */ -+ if (matchIndex <= prefixStartIndex) { -+ U32 const offset = (U32) (curr - dictMatchIndex - dictIndexDelta); -+ mLength = ZSTD_count_2segments(ip0 + 4, dictMatch + 4, iend, dictEnd, prefixStart) + 4; -+ while (((ip0 > anchor) & (dictMatch > dictStart)) -+ && (ip0[-1] == dictMatch[-1])) { -+ ip0--; -+ dictMatch--; -+ mLength++; -+ } /* catch up */ -+ offset_2 = offset_1; -+ offset_1 = offset; -+ ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); -+ break; -+ } -+ } -+ } -+ -+ if (ZSTD_match4Found_cmov(ip0, match, matchIndex, prefixStartIndex)) { -+ /* found a regular match of size >= 4 */ -+ U32 const offset = (U32) (ip0 - match); -+ mLength = ZSTD_count(ip0 + 4, match + 4, iend) + 4; -+ while (((ip0 > anchor) & (match > prefixStart)) -+ && (ip0[-1] == match[-1])) { -+ ip0--; -+ match--; -+ mLength++; - } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); -+ ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); -+ break; - } -- } else if (MEM_read32(match) != MEM_read32(ip)) { -- /* it's not a match, and we're not going to check the dictionary */ -- assert(stepSize >= 1); -- ip += ((ip-anchor) >> kSearchStrength) + stepSize; -- continue; -- } else { -- /* found a regular match */ -- U32 const offset = (U32)(ip-match); -- mLength = ZSTD_count(ip+4, match+4, iend) + 4; -- while (((ip>anchor) & (match>prefixStart)) -- && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ -- offset_2 = offset_1; -- offset_1 = offset; -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); -- } -+ -+ /* Prepare for next iteration */ -+ dictMatchIndexAndTag = dictHashTable[dictHashAndTag1 >> ZSTD_SHORT_CACHE_TAG_BITS]; -+ dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag1); -+ matchIndex = hashTable[hash1]; -+ -+ if (ip1 >= nextStep) { -+ step++; -+ nextStep += kStepIncr; -+ } -+ ip0 = ip1; -+ ip1 = ip1 + step; -+ if (ip1 > ilimit) goto _cleanup; -+ -+ curr = (U32)(ip0 - base); -+ hash0 = hash1; -+ } /* end inner search loop */ - - /* match found */ -- ip += mLength; -- anchor = ip; -+ assert(mLength); -+ ip0 += mLength; -+ anchor = ip0; - -- if (ip <= ilimit) { -+ if (ip0 <= ilimit) { - /* Fill Table */ - assert(base+curr+2 > istart); /* check base overflow */ - hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */ -- hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); -+ hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); - - /* check immediate repcode */ -- while (ip <= ilimit) { -- U32 const current2 = (U32)(ip-base); -+ while (ip0 <= ilimit) { -+ U32 const current2 = (U32)(ip0-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? - dictBase - dictIndexDelta + repIndex2 : - base + repIndex2; -- if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) -- && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { -+ if ( (ZSTD_index_overlap_check(prefixStartIndex, repIndex2)) -+ && (MEM_read32(repMatch2) == MEM_read32(ip0))) { - const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; -- size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; -+ size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ -- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); -- hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; -- ip += repLength2; -- anchor = ip; -+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); -+ hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = current2; -+ ip0 += repLength2; -+ anchor = ip0; - continue; - } - break; - } - } -+ -+ /* Prepare for next iteration */ -+ assert(ip0 == anchor); -+ ip1 = ip0 + stepSize; - } - -+_cleanup: - /* save reps for next block */ -- rep[0] = offset_1 ? offset_1 : offsetSaved; -- rep[1] = offset_2 ? offset_2 : offsetSaved; -+ rep[0] = offset_1; -+ rep[1] = offset_2; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -@@ -525,7 +683,7 @@ ZSTD_GEN_FAST_FN(dictMatchState, 6, 0) - ZSTD_GEN_FAST_FN(dictMatchState, 7, 0) - - size_t ZSTD_compressBlock_fast_dictMatchState( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { - U32 const mls = ms->cParams.minMatch; -@@ -545,19 +703,20 @@ size_t ZSTD_compressBlock_fast_dictMatch - } - - --static size_t ZSTD_compressBlock_fast_extDict_generic( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t ZSTD_compressBlock_fast_extDict_generic( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, U32 const mls, U32 const hasStep) - { - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hlog = cParams->hashLog; - /* support stepSize of 0 */ -- U32 const stepSize = cParams->targetLength + !(cParams->targetLength); -+ size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; - const BYTE* const base = ms->window.base; - const BYTE* const dictBase = ms->window.dictBase; - const BYTE* const istart = (const BYTE*)src; -- const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); -@@ -570,6 +729,28 @@ static size_t ZSTD_compressBlock_fast_ex - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - U32 offset_1=rep[0], offset_2=rep[1]; -+ U32 offsetSaved1 = 0, offsetSaved2 = 0; -+ -+ const BYTE* ip0 = istart; -+ const BYTE* ip1; -+ const BYTE* ip2; -+ const BYTE* ip3; -+ U32 current0; -+ -+ -+ size_t hash0; /* hash for ip0 */ -+ size_t hash1; /* hash for ip1 */ -+ U32 idx; /* match idx for ip0 */ -+ const BYTE* idxBase; /* base pointer for idx */ -+ -+ U32 offcode; -+ const BYTE* match0; -+ size_t mLength; -+ const BYTE* matchEnd = 0; /* initialize to avoid warning, assert != 0 later */ -+ -+ size_t step; -+ const BYTE* nextStep; -+ const size_t kStepIncr = (1 << (kSearchStrength - 1)); - - (void)hasStep; /* not currently specialized on whether it's accelerated */ - -@@ -579,75 +760,202 @@ static size_t ZSTD_compressBlock_fast_ex - if (prefixStartIndex == dictStartIndex) - return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize); - -- /* Search Loop */ -- while (ip < ilimit) { /* < instead of <=, because (ip+1) */ -- const size_t h = ZSTD_hashPtr(ip, hlog, mls); -- const U32 matchIndex = hashTable[h]; -- const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; -- const BYTE* match = matchBase + matchIndex; -- const U32 curr = (U32)(ip-base); -- const U32 repIndex = curr + 1 - offset_1; -- const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; -- const BYTE* const repMatch = repBase + repIndex; -- hashTable[h] = curr; /* update hash table */ -- DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr); -- -- if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ -- & (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */ -- && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { -- const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; -- size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; -- ip++; -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength); -- ip += rLength; -- anchor = ip; -- } else { -- if ( (matchIndex < dictStartIndex) || -- (MEM_read32(match) != MEM_read32(ip)) ) { -- assert(stepSize >= 1); -- ip += ((ip-anchor) >> kSearchStrength) + stepSize; -- continue; -+ { U32 const curr = (U32)(ip0 - base); -+ U32 const maxRep = curr - dictStartIndex; -+ if (offset_2 >= maxRep) offsetSaved2 = offset_2, offset_2 = 0; -+ if (offset_1 >= maxRep) offsetSaved1 = offset_1, offset_1 = 0; -+ } -+ -+ /* start each op */ -+_start: /* Requires: ip0 */ -+ -+ step = stepSize; -+ nextStep = ip0 + kStepIncr; -+ -+ /* calculate positions, ip0 - anchor == 0, so we skip step calc */ -+ ip1 = ip0 + 1; -+ ip2 = ip0 + step; -+ ip3 = ip2 + 1; -+ -+ if (ip3 >= ilimit) { -+ goto _cleanup; -+ } -+ -+ hash0 = ZSTD_hashPtr(ip0, hlog, mls); -+ hash1 = ZSTD_hashPtr(ip1, hlog, mls); -+ -+ idx = hashTable[hash0]; -+ idxBase = idx < prefixStartIndex ? dictBase : base; -+ -+ do { -+ { /* load repcode match for ip[2] */ -+ U32 const current2 = (U32)(ip2 - base); -+ U32 const repIndex = current2 - offset_1; -+ const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; -+ U32 rval; -+ if ( ((U32)(prefixStartIndex - repIndex) >= 4) /* intentional underflow */ -+ & (offset_1 > 0) ) { -+ rval = MEM_read32(repBase + repIndex); -+ } else { -+ rval = MEM_read32(ip2) ^ 1; /* guaranteed to not match. */ - } -- { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; -- const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; -- U32 const offset = curr - matchIndex; -- size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; -- while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ -- offset_2 = offset_1; offset_1 = offset; /* update offset history */ -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); -- ip += mLength; -- anchor = ip; -+ -+ /* write back hash table entry */ -+ current0 = (U32)(ip0 - base); -+ hashTable[hash0] = current0; -+ -+ /* check repcode at ip[2] */ -+ if (MEM_read32(ip2) == rval) { -+ ip0 = ip2; -+ match0 = repBase + repIndex; -+ matchEnd = repIndex < prefixStartIndex ? dictEnd : iend; -+ assert((match0 != prefixStart) & (match0 != dictStart)); -+ mLength = ip0[-1] == match0[-1]; -+ ip0 -= mLength; -+ match0 -= mLength; -+ offcode = REPCODE1_TO_OFFBASE; -+ mLength += 4; -+ goto _match; - } } - -- if (ip <= ilimit) { -- /* Fill Table */ -- hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; -- hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); -- /* check immediate repcode */ -- while (ip <= ilimit) { -- U32 const current2 = (U32)(ip-base); -- U32 const repIndex2 = current2 - offset_2; -- const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; -- if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex)) /* intentional overflow */ -- && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { -- const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; -- size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; -- { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ -- ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2); -- hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; -- ip += repLength2; -- anchor = ip; -- continue; -- } -- break; -- } } } -+ { /* load match for ip[0] */ -+ U32 const mval = idx >= dictStartIndex ? -+ MEM_read32(idxBase + idx) : -+ MEM_read32(ip0) ^ 1; /* guaranteed not to match */ -+ -+ /* check match at ip[0] */ -+ if (MEM_read32(ip0) == mval) { -+ /* found a match! */ -+ goto _offset; -+ } } -+ -+ /* lookup ip[1] */ -+ idx = hashTable[hash1]; -+ idxBase = idx < prefixStartIndex ? dictBase : base; -+ -+ /* hash ip[2] */ -+ hash0 = hash1; -+ hash1 = ZSTD_hashPtr(ip2, hlog, mls); -+ -+ /* advance to next positions */ -+ ip0 = ip1; -+ ip1 = ip2; -+ ip2 = ip3; -+ -+ /* write back hash table entry */ -+ current0 = (U32)(ip0 - base); -+ hashTable[hash0] = current0; -+ -+ { /* load match for ip[0] */ -+ U32 const mval = idx >= dictStartIndex ? -+ MEM_read32(idxBase + idx) : -+ MEM_read32(ip0) ^ 1; /* guaranteed not to match */ -+ -+ /* check match at ip[0] */ -+ if (MEM_read32(ip0) == mval) { -+ /* found a match! */ -+ goto _offset; -+ } } -+ -+ /* lookup ip[1] */ -+ idx = hashTable[hash1]; -+ idxBase = idx < prefixStartIndex ? dictBase : base; -+ -+ /* hash ip[2] */ -+ hash0 = hash1; -+ hash1 = ZSTD_hashPtr(ip2, hlog, mls); -+ -+ /* advance to next positions */ -+ ip0 = ip1; -+ ip1 = ip2; -+ ip2 = ip0 + step; -+ ip3 = ip1 + step; -+ -+ /* calculate step */ -+ if (ip2 >= nextStep) { -+ step++; -+ PREFETCH_L1(ip1 + 64); -+ PREFETCH_L1(ip1 + 128); -+ nextStep += kStepIncr; -+ } -+ } while (ip3 < ilimit); -+ -+_cleanup: -+ /* Note that there are probably still a couple positions we could search. -+ * However, it seems to be a meaningful performance hit to try to search -+ * them. So let's not. */ -+ -+ /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0), -+ * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */ -+ offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2; - - /* save reps for next block */ -- rep[0] = offset_1; -- rep[1] = offset_2; -+ rep[0] = offset_1 ? offset_1 : offsetSaved1; -+ rep[1] = offset_2 ? offset_2 : offsetSaved2; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -+ -+_offset: /* Requires: ip0, idx, idxBase */ -+ -+ /* Compute the offset code. */ -+ { U32 const offset = current0 - idx; -+ const BYTE* const lowMatchPtr = idx < prefixStartIndex ? dictStart : prefixStart; -+ matchEnd = idx < prefixStartIndex ? dictEnd : iend; -+ match0 = idxBase + idx; -+ offset_2 = offset_1; -+ offset_1 = offset; -+ offcode = OFFSET_TO_OFFBASE(offset); -+ mLength = 4; -+ -+ /* Count the backwards match length. */ -+ while (((ip0>anchor) & (match0>lowMatchPtr)) && (ip0[-1] == match0[-1])) { -+ ip0--; -+ match0--; -+ mLength++; -+ } } -+ -+_match: /* Requires: ip0, match0, offcode, matchEnd */ -+ -+ /* Count the forward length. */ -+ assert(matchEnd != 0); -+ mLength += ZSTD_count_2segments(ip0 + mLength, match0 + mLength, iend, matchEnd, prefixStart); -+ -+ ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength); -+ -+ ip0 += mLength; -+ anchor = ip0; -+ -+ /* write next hash table entry */ -+ if (ip1 < ip0) { -+ hashTable[hash1] = (U32)(ip1 - base); -+ } -+ -+ /* Fill table and check for immediate repcode. */ -+ if (ip0 <= ilimit) { -+ /* Fill Table */ -+ assert(base+current0+2 > istart); /* check base overflow */ -+ hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ -+ hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); -+ -+ while (ip0 <= ilimit) { -+ U32 const repIndex2 = (U32)(ip0-base) - offset_2; -+ const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; -+ if ( ((ZSTD_index_overlap_check(prefixStartIndex, repIndex2)) & (offset_2 > 0)) -+ && (MEM_read32(repMatch2) == MEM_read32(ip0)) ) { -+ const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; -+ size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; -+ { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ -+ ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); -+ hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); -+ ip0 += repLength2; -+ anchor = ip0; -+ continue; -+ } -+ break; -+ } } -+ -+ goto _start; - } - - ZSTD_GEN_FAST_FN(extDict, 4, 0) -@@ -656,10 +964,11 @@ ZSTD_GEN_FAST_FN(extDict, 6, 0) - ZSTD_GEN_FAST_FN(extDict, 7, 0) - - size_t ZSTD_compressBlock_fast_extDict( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { - U32 const mls = ms->cParams.minMatch; -+ assert(ms->dictMatchState == NULL); - switch(mls) - { - default: /* includes case 3 */ ---- a/lib/zstd/compress/zstd_fast.h -+++ b/lib/zstd/compress/zstd_fast.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -11,21 +12,20 @@ - #ifndef ZSTD_FAST_H - #define ZSTD_FAST_H - -- - #include "../common/mem.h" /* U32 */ - #include "zstd_compress_internal.h" - --void ZSTD_fillHashTable(ZSTD_matchState_t* ms, -- void const* end, ZSTD_dictTableLoadMethod_e dtlm); -+void ZSTD_fillHashTable(ZSTD_MatchState_t* ms, -+ void const* end, ZSTD_dictTableLoadMethod_e dtlm, -+ ZSTD_tableFillPurpose_e tfp); - size_t ZSTD_compressBlock_fast( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - size_t ZSTD_compressBlock_fast_dictMatchState( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - size_t ZSTD_compressBlock_fast_extDict( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - -- - #endif /* ZSTD_FAST_H */ ---- a/lib/zstd/compress/zstd_lazy.c -+++ b/lib/zstd/compress/zstd_lazy.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -10,14 +11,23 @@ - - #include "zstd_compress_internal.h" - #include "zstd_lazy.h" -+#include "../common/bits.h" /* ZSTD_countTrailingZeros64 */ -+ -+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) -+ -+#define kLazySkippingStep 8 - - - /*-************************************* - * Binary Tree search - ***************************************/ - --static void --ZSTD_updateDUBT(ZSTD_matchState_t* ms, -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_updateDUBT(ZSTD_MatchState_t* ms, - const BYTE* ip, const BYTE* iend, - U32 mls) - { -@@ -60,8 +70,9 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms, - * sort one already inserted but unsorted position - * assumption : curr >= btlow == (curr - btmask) - * doesn't fail */ --static void --ZSTD_insertDUBT1(const ZSTD_matchState_t* ms, -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_insertDUBT1(const ZSTD_MatchState_t* ms, - U32 curr, const BYTE* inputEnd, - U32 nbCompares, U32 btLow, - const ZSTD_dictMode_e dictMode) -@@ -149,9 +160,10 @@ ZSTD_insertDUBT1(const ZSTD_matchState_t - } - - --static size_t --ZSTD_DUBT_findBetterDictMatch ( -- const ZSTD_matchState_t* ms, -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t ZSTD_DUBT_findBetterDictMatch ( -+ const ZSTD_MatchState_t* ms, - const BYTE* const ip, const BYTE* const iend, - size_t* offsetPtr, - size_t bestLength, -@@ -159,7 +171,7 @@ ZSTD_DUBT_findBetterDictMatch ( - U32 const mls, - const ZSTD_dictMode_e dictMode) - { -- const ZSTD_matchState_t * const dms = ms->dictMatchState; -+ const ZSTD_MatchState_t * const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dmsCParams = &dms->cParams; - const U32 * const dictHashTable = dms->hashTable; - U32 const hashLog = dmsCParams->hashLog; -@@ -197,8 +209,8 @@ ZSTD_DUBT_findBetterDictMatch ( - U32 matchIndex = dictMatchIndex + dictIndexDelta; - if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { - DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", -- curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, STORE_OFFSET(curr - matchIndex), dictMatchIndex, matchIndex); -- bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex); -+ curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, OFFSET_TO_OFFBASE(curr - matchIndex), dictMatchIndex, matchIndex); -+ bestLength = matchLength, *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex); - } - if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ - break; /* drop, to guarantee consistency (miss a little bit of compression) */ -@@ -218,7 +230,7 @@ ZSTD_DUBT_findBetterDictMatch ( - } - - if (bestLength >= MINMATCH) { -- U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex; -+ U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offsetPtr); (void)mIndex; - DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", - curr, (U32)bestLength, (U32)*offsetPtr, mIndex); - } -@@ -227,10 +239,11 @@ ZSTD_DUBT_findBetterDictMatch ( - } - - --static size_t --ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t ZSTD_DUBT_findBestMatch(ZSTD_MatchState_t* ms, - const BYTE* const ip, const BYTE* const iend, -- size_t* offsetPtr, -+ size_t* offBasePtr, - U32 const mls, - const ZSTD_dictMode_e dictMode) - { -@@ -327,8 +340,8 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_ - if (matchLength > bestLength) { - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; -- if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) -- bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex); -+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)*offBasePtr)) ) -+ bestLength = matchLength, *offBasePtr = OFFSET_TO_OFFBASE(curr - matchIndex); - if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ - if (dictMode == ZSTD_dictMatchState) { - nbCompares = 0; /* in addition to avoiding checking any -@@ -361,16 +374,16 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_ - if (dictMode == ZSTD_dictMatchState && nbCompares) { - bestLength = ZSTD_DUBT_findBetterDictMatch( - ms, ip, iend, -- offsetPtr, bestLength, nbCompares, -+ offBasePtr, bestLength, nbCompares, - mls, dictMode); - } - - assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */ - ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ - if (bestLength >= MINMATCH) { -- U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex; -+ U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offBasePtr); (void)mIndex; - DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", -- curr, (U32)bestLength, (U32)*offsetPtr, mIndex); -+ curr, (U32)bestLength, (U32)*offBasePtr, mIndex); - } - return bestLength; - } -@@ -378,24 +391,25 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_ - - - /* ZSTD_BtFindBestMatch() : Tree updater, providing best match */ --FORCE_INLINE_TEMPLATE size_t --ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t ZSTD_BtFindBestMatch( ZSTD_MatchState_t* ms, - const BYTE* const ip, const BYTE* const iLimit, -- size_t* offsetPtr, -+ size_t* offBasePtr, - const U32 mls /* template */, - const ZSTD_dictMode_e dictMode) - { - DEBUGLOG(7, "ZSTD_BtFindBestMatch"); - if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateDUBT(ms, ip, iLimit, mls); -- return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode); -+ return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offBasePtr, mls, dictMode); - } - - /* ********************************* - * Dedicated dict search - ***********************************/ - --void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip) -+void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_MatchState_t* ms, const BYTE* const ip) - { - const BYTE* const base = ms->window.base; - U32 const target = (U32)(ip - base); -@@ -514,7 +528,7 @@ void ZSTD_dedicatedDictSearch_lazy_loadD - */ - FORCE_INLINE_TEMPLATE - size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts, -- const ZSTD_matchState_t* const dms, -+ const ZSTD_MatchState_t* const dms, - const BYTE* const ip, const BYTE* const iLimit, - const BYTE* const prefixStart, const U32 curr, - const U32 dictLimit, const size_t ddsIdx) { -@@ -561,7 +575,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_sea - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; -- *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta)); -+ *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta)); - if (ip+currentMl == iLimit) { - /* best possible, avoids read overflow on next attempt */ - return ml; -@@ -598,7 +612,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_sea - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; -- *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta)); -+ *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta)); - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ - } - } -@@ -614,10 +628,12 @@ size_t ZSTD_dedicatedDictSearch_lazy_sea - - /* Update chains up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ --FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( -- ZSTD_matchState_t* ms, -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+U32 ZSTD_insertAndFindFirstIndex_internal( -+ ZSTD_MatchState_t* ms, - const ZSTD_compressionParameters* const cParams, -- const BYTE* ip, U32 const mls) -+ const BYTE* ip, U32 const mls, U32 const lazySkipping) - { - U32* const hashTable = ms->hashTable; - const U32 hashLog = cParams->hashLog; -@@ -632,21 +648,25 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAnd - NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; - hashTable[h] = idx; - idx++; -+ /* Stop inserting every position when in the lazy skipping mode. */ -+ if (lazySkipping) -+ break; - } - - ms->nextToUpdate = target; - return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; - } - --U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { -+U32 ZSTD_insertAndFindFirstIndex(ZSTD_MatchState_t* ms, const BYTE* ip) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; -- return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); -+ return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkipping*/ 0); - } - - /* inlining is important to hardwire a hot branch (template emulation) */ - FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_HcFindBestMatch( -- ZSTD_matchState_t* ms, -+ ZSTD_MatchState_t* ms, - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 mls, const ZSTD_dictMode_e dictMode) -@@ -670,7 +690,7 @@ size_t ZSTD_HcFindBestMatch( - U32 nbAttempts = 1U << cParams->searchLog; - size_t ml=4-1; - -- const ZSTD_matchState_t* const dms = ms->dictMatchState; -+ const ZSTD_MatchState_t* const dms = ms->dictMatchState; - const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch - ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0; - const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch -@@ -684,14 +704,15 @@ size_t ZSTD_HcFindBestMatch( - } - - /* HC4 match finder */ -- matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); -+ matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls, ms->lazySkipping); - - for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) { - size_t currentMl=0; - if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { - const BYTE* const match = base + matchIndex; - assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ -- if (match[ml] == ip[ml]) /* potentially better */ -+ /* read 4B starting from (match + ml + 1 - sizeof(U32)) */ -+ if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */ - currentMl = ZSTD_count(ip, match, iLimit); - } else { - const BYTE* const match = dictBase + matchIndex; -@@ -703,7 +724,7 @@ size_t ZSTD_HcFindBestMatch( - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; -- *offsetPtr = STORE_OFFSET(curr - matchIndex); -+ *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex); - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ - } - -@@ -739,7 +760,7 @@ size_t ZSTD_HcFindBestMatch( - if (currentMl > ml) { - ml = currentMl; - assert(curr > matchIndex + dmsIndexDelta); -- *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta)); -+ *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta)); - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ - } - -@@ -756,8 +777,6 @@ size_t ZSTD_HcFindBestMatch( - * (SIMD) Row-based matchfinder - ***********************************/ - /* Constants for row-based hash */ --#define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */ --#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */ - #define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1) - #define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */ - -@@ -769,64 +788,19 @@ typedef U64 ZSTD_VecMask; /* Clarifies - * Starting from the LSB, returns the idx of the next non-zero bit. - * Basically counting the nb of trailing zeroes. - */ --static U32 ZSTD_VecMask_next(ZSTD_VecMask val) { -- assert(val != 0); --# if (defined(__GNUC__) && ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4)))) -- if (sizeof(size_t) == 4) { -- U32 mostSignificantWord = (U32)(val >> 32); -- U32 leastSignificantWord = (U32)val; -- if (leastSignificantWord == 0) { -- return 32 + (U32)__builtin_ctz(mostSignificantWord); -- } else { -- return (U32)__builtin_ctz(leastSignificantWord); -- } -- } else { -- return (U32)__builtin_ctzll(val); -- } --# else -- /* Software ctz version: http://aggregate.org/MAGIC/#Trailing%20Zero%20Count -- * and: https://stackoverflow.com/questions/2709430/count-number-of-bits-in-a-64-bit-long-big-integer -- */ -- val = ~val & (val - 1ULL); /* Lowest set bit mask */ -- val = val - ((val >> 1) & 0x5555555555555555); -- val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL); -- return (U32)((((val + (val >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56); --# endif --} -- --/* ZSTD_rotateRight_*(): -- * Rotates a bitfield to the right by "count" bits. -- * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts -- */ --FORCE_INLINE_TEMPLATE --U64 ZSTD_rotateRight_U64(U64 const value, U32 count) { -- assert(count < 64); -- count &= 0x3F; /* for fickle pattern recognition */ -- return (value >> count) | (U64)(value << ((0U - count) & 0x3F)); --} -- --FORCE_INLINE_TEMPLATE --U32 ZSTD_rotateRight_U32(U32 const value, U32 count) { -- assert(count < 32); -- count &= 0x1F; /* for fickle pattern recognition */ -- return (value >> count) | (U32)(value << ((0U - count) & 0x1F)); --} -- --FORCE_INLINE_TEMPLATE --U16 ZSTD_rotateRight_U16(U16 const value, U32 count) { -- assert(count < 16); -- count &= 0x0F; /* for fickle pattern recognition */ -- return (value >> count) | (U16)(value << ((0U - count) & 0x0F)); -+MEM_STATIC U32 ZSTD_VecMask_next(ZSTD_VecMask val) { -+ return ZSTD_countTrailingZeros64(val); - } - - /* ZSTD_row_nextIndex(): - * Returns the next index to insert at within a tagTable row, and updates the "head" -- * value to reflect the update. Essentially cycles backwards from [0, {entries per row}) -+ * value to reflect the update. Essentially cycles backwards from [1, {entries per row}) - */ - FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) { -- U32 const next = (*tagRow - 1) & rowMask; -- *tagRow = (BYTE)next; -- return next; -+ U32 next = (*tagRow-1) & rowMask; -+ next += (next == 0) ? rowMask : 0; /* skip first position */ -+ *tagRow = (BYTE)next; -+ return next; - } - - /* ZSTD_isAligned(): -@@ -840,7 +814,7 @@ MEM_STATIC int ZSTD_isAligned(void const - /* ZSTD_row_prefetch(): - * Performs prefetching for the hashTable and tagTable at a given row. - */ --FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) { -+FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) { - PREFETCH_L1(hashTable + relRow); - if (rowLog >= 5) { - PREFETCH_L1(hashTable + relRow + 16); -@@ -859,18 +833,20 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_pref - * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries, - * but not beyond iLimit. - */ --FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base, -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_row_fillHashCache(ZSTD_MatchState_t* ms, const BYTE* base, - U32 const rowLog, U32 const mls, - U32 idx, const BYTE* const iLimit) - { - U32 const* const hashTable = ms->hashTable; -- U16 const* const tagTable = ms->tagTable; -+ BYTE const* const tagTable = ms->tagTable; - U32 const hashLog = ms->rowHashLog; - U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1); - U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch); - - for (; idx < lim; ++idx) { -- U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); -+ U32 const hash = (U32)ZSTD_hashPtrSalted(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt); - U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; - ZSTD_row_prefetch(hashTable, tagTable, row, rowLog); - ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash; -@@ -885,12 +861,15 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fill - * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at - * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable. - */ --FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable, -- U16 const* tagTable, BYTE const* base, -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable, -+ BYTE const* tagTable, BYTE const* base, - U32 idx, U32 const hashLog, -- U32 const rowLog, U32 const mls) -+ U32 const rowLog, U32 const mls, -+ U64 const hashSalt) - { -- U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); -+ U32 const newHash = (U32)ZSTD_hashPtrSalted(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt); - U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; - ZSTD_row_prefetch(hashTable, tagTable, row, rowLog); - { U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK]; -@@ -902,28 +881,29 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextC - /* ZSTD_row_update_internalImpl(): - * Updates the hash table with positions starting from updateStartIdx until updateEndIdx. - */ --FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms, -- U32 updateStartIdx, U32 const updateEndIdx, -- U32 const mls, U32 const rowLog, -- U32 const rowMask, U32 const useCache) -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_row_update_internalImpl(ZSTD_MatchState_t* ms, -+ U32 updateStartIdx, U32 const updateEndIdx, -+ U32 const mls, U32 const rowLog, -+ U32 const rowMask, U32 const useCache) - { - U32* const hashTable = ms->hashTable; -- U16* const tagTable = ms->tagTable; -+ BYTE* const tagTable = ms->tagTable; - U32 const hashLog = ms->rowHashLog; - const BYTE* const base = ms->window.base; - - DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx); - for (; updateStartIdx < updateEndIdx; ++updateStartIdx) { -- U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls) -- : (U32)ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); -+ U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls, ms->hashSalt) -+ : (U32)ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt); - U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; - U32* const row = hashTable + relRow; -- BYTE* tagRow = (BYTE*)(tagTable + relRow); /* Though tagTable is laid out as a table of U16, each tag is only 1 byte. -- Explicit cast allows us to get exact desired position within each row */ -+ BYTE* tagRow = tagTable + relRow; - U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask); - -- assert(hash == ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls)); -- ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK; -+ assert(hash == ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt)); -+ tagRow[pos] = hash & ZSTD_ROW_HASH_TAG_MASK; - row[pos] = updateStartIdx; - } - } -@@ -932,9 +912,11 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_upda - * Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate. - * Skips sections of long matches as is necessary. - */ --FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip, -- U32 const mls, U32 const rowLog, -- U32 const rowMask, U32 const useCache) -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_row_update_internal(ZSTD_MatchState_t* ms, const BYTE* ip, -+ U32 const mls, U32 const rowLog, -+ U32 const rowMask, U32 const useCache) - { - U32 idx = ms->nextToUpdate; - const BYTE* const base = ms->window.base; -@@ -965,13 +947,41 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_upda - * External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary - * processing. - */ --void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) { -+void ZSTD_row_update(ZSTD_MatchState_t* const ms, const BYTE* ip) { - const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); - const U32 rowMask = (1u << rowLog) - 1; - const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */); - - DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog); -- ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* dont use cache */); -+ ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* don't use cache */); -+} -+ -+/* Returns the mask width of bits group of which will be set to 1. Given not all -+ * architectures have easy movemask instruction, this helps to iterate over -+ * groups of bits easier and faster. -+ */ -+FORCE_INLINE_TEMPLATE U32 -+ZSTD_row_matchMaskGroupWidth(const U32 rowEntries) -+{ -+ assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64); -+ assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES); -+ (void)rowEntries; -+#if defined(ZSTD_ARCH_ARM_NEON) -+ /* NEON path only works for little endian */ -+ if (!MEM_isLittleEndian()) { -+ return 1; -+ } -+ if (rowEntries == 16) { -+ return 4; -+ } -+ if (rowEntries == 32) { -+ return 2; -+ } -+ if (rowEntries == 64) { -+ return 1; -+ } -+#endif -+ return 1; - } - - #if defined(ZSTD_ARCH_X86_SSE2) -@@ -994,71 +1004,82 @@ ZSTD_row_getSSEMask(int nbChunks, const - } - #endif - --/* Returns a ZSTD_VecMask (U32) that has the nth bit set to 1 if the newly-computed "tag" matches -- * the hash at the nth position in a row of the tagTable. -- * Each row is a circular buffer beginning at the value of "head". So we must rotate the "matches" bitfield -- * to match up with the actual layout of the entries within the hashTable */ -+#if defined(ZSTD_ARCH_ARM_NEON) -+FORCE_INLINE_TEMPLATE ZSTD_VecMask -+ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag, const U32 headGrouped) -+{ -+ assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64); -+ if (rowEntries == 16) { -+ /* vshrn_n_u16 shifts by 4 every u16 and narrows to 8 lower bits. -+ * After that groups of 4 bits represent the equalMask. We lower -+ * all bits except the highest in these groups by doing AND with -+ * 0x88 = 0b10001000. -+ */ -+ const uint8x16_t chunk = vld1q_u8(src); -+ const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag))); -+ const uint8x8_t res = vshrn_n_u16(equalMask, 4); -+ const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0); -+ return ZSTD_rotateRight_U64(matches, headGrouped) & 0x8888888888888888ull; -+ } else if (rowEntries == 32) { -+ /* Same idea as with rowEntries == 16 but doing AND with -+ * 0x55 = 0b01010101. -+ */ -+ const uint16x8x2_t chunk = vld2q_u16((const uint16_t*)(const void*)src); -+ const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]); -+ const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]); -+ const uint8x16_t dup = vdupq_n_u8(tag); -+ const uint8x8_t t0 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk0, dup)), 6); -+ const uint8x8_t t1 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk1, dup)), 6); -+ const uint8x8_t res = vsli_n_u8(t0, t1, 4); -+ const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0) ; -+ return ZSTD_rotateRight_U64(matches, headGrouped) & 0x5555555555555555ull; -+ } else { /* rowEntries == 64 */ -+ const uint8x16x4_t chunk = vld4q_u8(src); -+ const uint8x16_t dup = vdupq_n_u8(tag); -+ const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup); -+ const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup); -+ const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup); -+ const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup); -+ -+ const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1); -+ const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1); -+ const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2); -+ const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4); -+ const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4); -+ const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0); -+ return ZSTD_rotateRight_U64(matches, headGrouped); -+ } -+} -+#endif -+ -+/* Returns a ZSTD_VecMask (U64) that has the nth group (determined by -+ * ZSTD_row_matchMaskGroupWidth) of bits set to 1 if the newly-computed "tag" -+ * matches the hash at the nth position in a row of the tagTable. -+ * Each row is a circular buffer beginning at the value of "headGrouped". So we -+ * must rotate the "matches" bitfield to match up with the actual layout of the -+ * entries within the hashTable */ - FORCE_INLINE_TEMPLATE ZSTD_VecMask --ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head, const U32 rowEntries) -+ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 rowEntries) - { -- const BYTE* const src = tagRow + ZSTD_ROW_HASH_TAG_OFFSET; -+ const BYTE* const src = tagRow; - assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64); - assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES); -+ assert(ZSTD_row_matchMaskGroupWidth(rowEntries) * rowEntries <= sizeof(ZSTD_VecMask) * 8); - - #if defined(ZSTD_ARCH_X86_SSE2) - -- return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, head); -+ return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, headGrouped); - - #else /* SW or NEON-LE */ - - # if defined(ZSTD_ARCH_ARM_NEON) - /* This NEON path only works for little endian - otherwise use SWAR below */ - if (MEM_isLittleEndian()) { -- if (rowEntries == 16) { -- const uint8x16_t chunk = vld1q_u8(src); -- const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag))); -- const uint16x8_t t0 = vshlq_n_u16(equalMask, 7); -- const uint32x4_t t1 = vreinterpretq_u32_u16(vsriq_n_u16(t0, t0, 14)); -- const uint64x2_t t2 = vreinterpretq_u64_u32(vshrq_n_u32(t1, 14)); -- const uint8x16_t t3 = vreinterpretq_u8_u64(vsraq_n_u64(t2, t2, 28)); -- const U16 hi = (U16)vgetq_lane_u8(t3, 8); -- const U16 lo = (U16)vgetq_lane_u8(t3, 0); -- return ZSTD_rotateRight_U16((hi << 8) | lo, head); -- } else if (rowEntries == 32) { -- const uint16x8x2_t chunk = vld2q_u16((const U16*)(const void*)src); -- const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]); -- const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]); -- const uint8x16_t equalMask0 = vceqq_u8(chunk0, vdupq_n_u8(tag)); -- const uint8x16_t equalMask1 = vceqq_u8(chunk1, vdupq_n_u8(tag)); -- const int8x8_t pack0 = vqmovn_s16(vreinterpretq_s16_u8(equalMask0)); -- const int8x8_t pack1 = vqmovn_s16(vreinterpretq_s16_u8(equalMask1)); -- const uint8x8_t t0 = vreinterpret_u8_s8(pack0); -- const uint8x8_t t1 = vreinterpret_u8_s8(pack1); -- const uint8x8_t t2 = vsri_n_u8(t1, t0, 2); -- const uint8x8x2_t t3 = vuzp_u8(t2, t0); -- const uint8x8_t t4 = vsri_n_u8(t3.val[1], t3.val[0], 4); -- const U32 matches = vget_lane_u32(vreinterpret_u32_u8(t4), 0); -- return ZSTD_rotateRight_U32(matches, head); -- } else { /* rowEntries == 64 */ -- const uint8x16x4_t chunk = vld4q_u8(src); -- const uint8x16_t dup = vdupq_n_u8(tag); -- const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup); -- const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup); -- const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup); -- const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup); -- -- const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1); -- const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1); -- const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2); -- const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4); -- const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4); -- const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0); -- return ZSTD_rotateRight_U64(matches, head); -- } -+ return ZSTD_row_getNEONMask(rowEntries, src, tag, headGrouped); - } - # endif /* ZSTD_ARCH_ARM_NEON */ - /* SWAR */ -- { const size_t chunkSize = sizeof(size_t); -+ { const int chunkSize = sizeof(size_t); - const size_t shiftAmount = ((chunkSize * 8) - chunkSize); - const size_t xFF = ~((size_t)0); - const size_t x01 = xFF / 0xFF; -@@ -1091,11 +1112,11 @@ ZSTD_row_getMatchMask(const BYTE* const - } - matches = ~matches; - if (rowEntries == 16) { -- return ZSTD_rotateRight_U16((U16)matches, head); -+ return ZSTD_rotateRight_U16((U16)matches, headGrouped); - } else if (rowEntries == 32) { -- return ZSTD_rotateRight_U32((U32)matches, head); -+ return ZSTD_rotateRight_U32((U32)matches, headGrouped); - } else { -- return ZSTD_rotateRight_U64((U64)matches, head); -+ return ZSTD_rotateRight_U64((U64)matches, headGrouped); - } - } - #endif -@@ -1103,29 +1124,30 @@ ZSTD_row_getMatchMask(const BYTE* const - - /* The high-level approach of the SIMD row based match finder is as follows: - * - Figure out where to insert the new entry: -- * - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag" -- * - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines -+ * - Generate a hash for current input position and split it into a one byte of tag and `rowHashLog` bits of index. -+ * - The hash is salted by a value that changes on every context reset, so when the same table is used -+ * we will avoid collisions that would otherwise slow us down by introducing phantom matches. -+ * - The hashTable is effectively split into groups or "rows" of 15 or 31 entries of U32, and the index determines - * which row to insert into. -- * - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can -- * be considered as a circular buffer with a "head" index that resides in the tagTable. -- * - Also insert the "tag" into the equivalent row and position in the tagTable. -- * - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry. -- * The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively, -- * for alignment/performance reasons, leaving some bytes unused. -- * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and -+ * - Determine the correct position within the row to insert the entry into. Each row of 15 or 31 can -+ * be considered as a circular buffer with a "head" index that resides in the tagTable (overall 16 or 32 bytes -+ * per row). -+ * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte tag calculated for the position and - * generate a bitfield that we can cycle through to check the collisions in the hash table. - * - Pick the longest match. -+ * - Insert the tag into the equivalent row and position in the tagTable. - */ - FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_RowFindBestMatch( -- ZSTD_matchState_t* ms, -+ ZSTD_MatchState_t* ms, - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 mls, const ZSTD_dictMode_e dictMode, - const U32 rowLog) - { - U32* const hashTable = ms->hashTable; -- U16* const tagTable = ms->tagTable; -+ BYTE* const tagTable = ms->tagTable; - U32* const hashCache = ms->hashCache; - const U32 hashLog = ms->rowHashLog; - const ZSTD_compressionParameters* const cParams = &ms->cParams; -@@ -1143,11 +1165,14 @@ size_t ZSTD_RowFindBestMatch( - const U32 rowEntries = (1U << rowLog); - const U32 rowMask = rowEntries - 1; - const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */ -+ const U32 groupWidth = ZSTD_row_matchMaskGroupWidth(rowEntries); -+ const U64 hashSalt = ms->hashSalt; - U32 nbAttempts = 1U << cappedSearchLog; - size_t ml=4-1; -+ U32 hash; - - /* DMS/DDS variables that may be referenced laster */ -- const ZSTD_matchState_t* const dms = ms->dictMatchState; -+ const ZSTD_MatchState_t* const dms = ms->dictMatchState; - - /* Initialize the following variables to satisfy static analyzer */ - size_t ddsIdx = 0; -@@ -1168,7 +1193,7 @@ size_t ZSTD_RowFindBestMatch( - if (dictMode == ZSTD_dictMatchState) { - /* Prefetch DMS rows */ - U32* const dmsHashTable = dms->hashTable; -- U16* const dmsTagTable = dms->tagTable; -+ BYTE* const dmsTagTable = dms->tagTable; - U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls); - U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; - dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK; -@@ -1178,23 +1203,34 @@ size_t ZSTD_RowFindBestMatch( - } - - /* Update the hashTable and tagTable up to (but not including) ip */ -- ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */); -+ if (!ms->lazySkipping) { -+ ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */); -+ hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls, hashSalt); -+ } else { -+ /* Stop inserting every position when in the lazy skipping mode. -+ * The hash cache is also not kept up to date in this mode. -+ */ -+ hash = (U32)ZSTD_hashPtrSalted(ip, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt); -+ ms->nextToUpdate = curr; -+ } -+ ms->hashSaltEntropy += hash; /* collect salt entropy */ -+ - { /* Get the hash for ip, compute the appropriate row */ -- U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls); - U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; - U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK; - U32* const row = hashTable + relRow; - BYTE* tagRow = (BYTE*)(tagTable + relRow); -- U32 const head = *tagRow & rowMask; -+ U32 const headGrouped = (*tagRow & rowMask) * groupWidth; - U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES]; - size_t numMatches = 0; - size_t currMatch = 0; -- ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, head, rowEntries); -+ ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries); - - /* Cycle through the matches and prefetch */ -- for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { -- U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; -+ for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) { -+ U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask; - U32 const matchIndex = row[matchPos]; -+ if(matchPos == 0) continue; - assert(numMatches < rowEntries); - if (matchIndex < lowLimit) - break; -@@ -1204,13 +1240,14 @@ size_t ZSTD_RowFindBestMatch( - PREFETCH_L1(dictBase + matchIndex); - } - matchBuffer[numMatches++] = matchIndex; -+ --nbAttempts; - } - - /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop - in ZSTD_row_update_internal() at the next search. */ - { - U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask); -- tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag; -+ tagRow[pos] = (BYTE)tag; - row[pos] = ms->nextToUpdate++; - } - -@@ -1224,7 +1261,8 @@ size_t ZSTD_RowFindBestMatch( - if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { - const BYTE* const match = base + matchIndex; - assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ -- if (match[ml] == ip[ml]) /* potentially better */ -+ /* read 4B starting from (match + ml + 1 - sizeof(U32)) */ -+ if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */ - currentMl = ZSTD_count(ip, match, iLimit); - } else { - const BYTE* const match = dictBase + matchIndex; -@@ -1236,7 +1274,7 @@ size_t ZSTD_RowFindBestMatch( - /* Save best solution */ - if (currentMl > ml) { - ml = currentMl; -- *offsetPtr = STORE_OFFSET(curr - matchIndex); -+ *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex); - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ - } - } -@@ -1254,19 +1292,21 @@ size_t ZSTD_RowFindBestMatch( - const U32 dmsSize = (U32)(dmsEnd - dmsBase); - const U32 dmsIndexDelta = dictLimit - dmsSize; - -- { U32 const head = *dmsTagRow & rowMask; -+ { U32 const headGrouped = (*dmsTagRow & rowMask) * groupWidth; - U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES]; - size_t numMatches = 0; - size_t currMatch = 0; -- ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries); -+ ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, headGrouped, rowEntries); - -- for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { -- U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; -+ for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) { -+ U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask; - U32 const matchIndex = dmsRow[matchPos]; -+ if(matchPos == 0) continue; - if (matchIndex < dmsLowestIndex) - break; - PREFETCH_L1(dmsBase + matchIndex); - matchBuffer[numMatches++] = matchIndex; -+ --nbAttempts; - } - - /* Return the longest match */ -@@ -1285,7 +1325,7 @@ size_t ZSTD_RowFindBestMatch( - if (currentMl > ml) { - ml = currentMl; - assert(curr > matchIndex + dmsIndexDelta); -- *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta)); -+ *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta)); - if (ip+currentMl == iLimit) break; - } - } -@@ -1301,7 +1341,7 @@ size_t ZSTD_RowFindBestMatch( - * ZSTD_searchMax() dispatches to the correct implementation function. - * - * TODO: The start of the search function involves loading and calculating a -- * bunch of constants from the ZSTD_matchState_t. These computations could be -+ * bunch of constants from the ZSTD_MatchState_t. These computations could be - * done in an initialization function, and saved somewhere in the match state. - * Then we could pass a pointer to the saved state instead of the match state, - * and avoid duplicate computations. -@@ -1325,7 +1365,7 @@ size_t ZSTD_RowFindBestMatch( - - #define GEN_ZSTD_BT_SEARCH_FN(dictMode, mls) \ - ZSTD_SEARCH_FN_ATTRS size_t ZSTD_BT_SEARCH_FN(dictMode, mls)( \ -- ZSTD_matchState_t* ms, \ -+ ZSTD_MatchState_t* ms, \ - const BYTE* ip, const BYTE* const iLimit, \ - size_t* offBasePtr) \ - { \ -@@ -1335,7 +1375,7 @@ size_t ZSTD_RowFindBestMatch( - - #define GEN_ZSTD_HC_SEARCH_FN(dictMode, mls) \ - ZSTD_SEARCH_FN_ATTRS size_t ZSTD_HC_SEARCH_FN(dictMode, mls)( \ -- ZSTD_matchState_t* ms, \ -+ ZSTD_MatchState_t* ms, \ - const BYTE* ip, const BYTE* const iLimit, \ - size_t* offsetPtr) \ - { \ -@@ -1345,7 +1385,7 @@ size_t ZSTD_RowFindBestMatch( - - #define GEN_ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) \ - ZSTD_SEARCH_FN_ATTRS size_t ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)( \ -- ZSTD_matchState_t* ms, \ -+ ZSTD_MatchState_t* ms, \ - const BYTE* ip, const BYTE* const iLimit, \ - size_t* offsetPtr) \ - { \ -@@ -1446,7 +1486,7 @@ typedef enum { search_hashChain=0, searc - * If a match is found its offset is stored in @p offsetPtr. - */ - FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax( -- ZSTD_matchState_t* ms, -+ ZSTD_MatchState_t* ms, - const BYTE* ip, - const BYTE* iend, - size_t* offsetPtr, -@@ -1472,9 +1512,10 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_search - * Common parser - lazy strategy - *********************************/ - --FORCE_INLINE_TEMPLATE size_t --ZSTD_compressBlock_lazy_generic( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t ZSTD_compressBlock_lazy_generic( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize, - const searchMethod_e searchMethod, const U32 depth, -@@ -1491,12 +1532,13 @@ ZSTD_compressBlock_lazy_generic( - const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6); - const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); - -- U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; -+ U32 offset_1 = rep[0], offset_2 = rep[1]; -+ U32 offsetSaved1 = 0, offsetSaved2 = 0; - - const int isDMS = dictMode == ZSTD_dictMatchState; - const int isDDS = dictMode == ZSTD_dedicatedDictSearch; - const int isDxS = isDMS || isDDS; -- const ZSTD_matchState_t* const dms = ms->dictMatchState; -+ const ZSTD_MatchState_t* const dms = ms->dictMatchState; - const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0; - const BYTE* const dictBase = isDxS ? dms->window.base : NULL; - const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL; -@@ -1512,8 +1554,8 @@ ZSTD_compressBlock_lazy_generic( - U32 const curr = (U32)(ip - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog); - U32 const maxRep = curr - windowLow; -- if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; -- if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; -+ if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0; -+ if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0; - } - if (isDxS) { - /* dictMatchState repCode checks don't currently handle repCode == 0 -@@ -1522,10 +1564,11 @@ ZSTD_compressBlock_lazy_generic( - assert(offset_2 <= dictAndPrefixLength); - } - -+ /* Reset the lazy skipping state */ -+ ms->lazySkipping = 0; -+ - if (searchMethod == search_rowHash) { -- ZSTD_row_fillHashCache(ms, base, rowLog, -- MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */), -- ms->nextToUpdate, ilimit); -+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); - } - - /* Match Loop */ -@@ -1537,7 +1580,7 @@ ZSTD_compressBlock_lazy_generic( - #endif - while (ip < ilimit) { - size_t matchLength=0; -- size_t offcode=STORE_REPCODE_1; -+ size_t offBase = REPCODE1_TO_OFFBASE; - const BYTE* start=ip+1; - DEBUGLOG(7, "search baseline (depth 0)"); - -@@ -1548,7 +1591,7 @@ ZSTD_compressBlock_lazy_generic( - && repIndex < prefixLowestIndex) ? - dictBase + (repIndex - dictIndexDelta) : - base + repIndex; -- if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) -+ if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex)) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; -@@ -1562,14 +1605,23 @@ ZSTD_compressBlock_lazy_generic( - } - - /* first search (depth 0) */ -- { size_t offsetFound = 999999999; -- size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offsetFound, mls, rowLog, searchMethod, dictMode); -+ { size_t offbaseFound = 999999999; -+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offbaseFound, mls, rowLog, searchMethod, dictMode); - if (ml2 > matchLength) -- matchLength = ml2, start = ip, offcode=offsetFound; -+ matchLength = ml2, start = ip, offBase = offbaseFound; - } - - if (matchLength < 4) { -- ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ -+ size_t const step = ((size_t)(ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */; -+ ip += step; -+ /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time. -+ * In this mode we stop inserting every position into our tables, and only insert -+ * positions that we search, which is one in step positions. -+ * The exact cutoff is flexible, I've just chosen a number that is reasonably high, -+ * so we minimize the compression ratio loss in "normal" scenarios. This mode gets -+ * triggered once we've gone 2KB without finding any matches. -+ */ -+ ms->lazySkipping = step > kLazySkippingStep; - continue; - } - -@@ -1579,34 +1631,34 @@ ZSTD_compressBlock_lazy_generic( - DEBUGLOG(7, "search depth 1"); - ip ++; - if ( (dictMode == ZSTD_noDict) -- && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { -+ && (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { - size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; - int const gain2 = (int)(mlRep * 3); -- int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); -+ int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) -- matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; -+ matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip; - } - if (isDxS) { - const U32 repIndex = (U32)(ip - base) - offset_1; - const BYTE* repMatch = repIndex < prefixLowestIndex ? - dictBase + (repIndex - dictIndexDelta) : - base + repIndex; -- if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) -+ if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex)) - && (MEM_read32(repMatch) == MEM_read32(ip)) ) { - const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; - int const gain2 = (int)(mlRep * 3); -- int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); -+ int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) -- matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; -+ matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip; - } - } -- { size_t offset2=999999999; -- size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, dictMode); -- int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ -- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4); -+ { size_t ofbCandidate=999999999; -+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode); -+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ -+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4); - if ((ml2 >= 4) && (gain2 > gain1)) { -- matchLength = ml2, offcode = offset2, start = ip; -+ matchLength = ml2, offBase = ofbCandidate, start = ip; - continue; /* search a better one */ - } } - -@@ -1615,34 +1667,34 @@ ZSTD_compressBlock_lazy_generic( - DEBUGLOG(7, "search depth 2"); - ip ++; - if ( (dictMode == ZSTD_noDict) -- && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { -+ && (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { - size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; - int const gain2 = (int)(mlRep * 4); -- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); -+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) -- matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; -+ matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip; - } - if (isDxS) { - const U32 repIndex = (U32)(ip - base) - offset_1; - const BYTE* repMatch = repIndex < prefixLowestIndex ? - dictBase + (repIndex - dictIndexDelta) : - base + repIndex; -- if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) -+ if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex)) - && (MEM_read32(repMatch) == MEM_read32(ip)) ) { - const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; - int const gain2 = (int)(mlRep * 4); -- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); -+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) -- matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; -+ matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip; - } - } -- { size_t offset2=999999999; -- size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, dictMode); -- int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ -- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7); -+ { size_t ofbCandidate=999999999; -+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode); -+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ -+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7); - if ((ml2 >= 4) && (gain2 > gain1)) { -- matchLength = ml2, offcode = offset2, start = ip; -+ matchLength = ml2, offBase = ofbCandidate, start = ip; - continue; - } } } - break; /* nothing found : store previous solution */ -@@ -1653,26 +1705,33 @@ ZSTD_compressBlock_lazy_generic( - * notably if `value` is unsigned, resulting in a large positive `-value`. - */ - /* catch up */ -- if (STORED_IS_OFFSET(offcode)) { -+ if (OFFBASE_IS_OFFSET(offBase)) { - if (dictMode == ZSTD_noDict) { -- while ( ((start > anchor) & (start - STORED_OFFSET(offcode) > prefixLowest)) -- && (start[-1] == (start-STORED_OFFSET(offcode))[-1]) ) /* only search for offset within prefix */ -+ while ( ((start > anchor) & (start - OFFBASE_TO_OFFSET(offBase) > prefixLowest)) -+ && (start[-1] == (start-OFFBASE_TO_OFFSET(offBase))[-1]) ) /* only search for offset within prefix */ - { start--; matchLength++; } - } - if (isDxS) { -- U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode)); -+ U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase)); - const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; - const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; - while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ - } -- offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode); -+ offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase); - } - /* store sequence */ - _storeSequence: - { size_t const litLength = (size_t)(start - anchor); -- ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength); -+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength); - anchor = ip = start + matchLength; - } -+ if (ms->lazySkipping) { -+ /* We've found a match, disable lazy skipping mode, and refill the hash cache. */ -+ if (searchMethod == search_rowHash) { -+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); -+ } -+ ms->lazySkipping = 0; -+ } - - /* check immediate repcode */ - if (isDxS) { -@@ -1682,12 +1741,12 @@ _storeSequence: - const BYTE* repMatch = repIndex < prefixLowestIndex ? - dictBase - dictIndexDelta + repIndex : - base + repIndex; -- if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) -+ if ( (ZSTD_index_overlap_check(prefixLowestIndex, repIndex)) - && (MEM_read32(repMatch) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; -- offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap offset_2 <=> offset_1 */ -- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength); -+ offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap offset_2 <=> offset_1 */ -+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength); - ip += matchLength; - anchor = ip; - continue; -@@ -1701,168 +1760,183 @@ _storeSequence: - && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { - /* store sequence */ - matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; -- offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap repcodes */ -- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength); -+ offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap repcodes */ -+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } } } - -- /* Save reps for next block */ -- rep[0] = offset_1 ? offset_1 : savedOffset; -- rep[1] = offset_2 ? offset_2 : savedOffset; -+ /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0), -+ * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */ -+ offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2; -+ -+ /* save reps for next block */ -+ rep[0] = offset_1 ? offset_1 : offsetSaved1; -+ rep[1] = offset_2 ? offset_2 : offsetSaved2; - - /* Return the last literals size */ - return (size_t)(iend - anchor); - } -+#endif /* build exclusions */ - - --size_t ZSTD_compressBlock_btlazy2( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_greedy( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); - } - --size_t ZSTD_compressBlock_lazy2( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_greedy_dictMatchState( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); - } - --size_t ZSTD_compressBlock_lazy( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); - } - --size_t ZSTD_compressBlock_greedy( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_greedy_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict); - } - --size_t ZSTD_compressBlock_btlazy2_dictMatchState( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_greedy_dictMatchState_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState); - } - --size_t ZSTD_compressBlock_lazy2_dictMatchState( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch); - } -+#endif - --size_t ZSTD_compressBlock_lazy_dictMatchState( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_lazy( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); - } - --size_t ZSTD_compressBlock_greedy_dictMatchState( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy_dictMatchState( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); - } - -- --size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch); - } - --size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict); - } - --size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy_dictMatchState_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState); - } - --/* Row-based matchfinder */ --size_t ZSTD_compressBlock_lazy2_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch); - } -+#endif - --size_t ZSTD_compressBlock_lazy_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_lazy2( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); - } - --size_t ZSTD_compressBlock_greedy_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy2_dictMatchState( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); - } - --size_t ZSTD_compressBlock_lazy2_dictMatchState_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch); - } - --size_t ZSTD_compressBlock_lazy_dictMatchState_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy2_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict); - } - --size_t ZSTD_compressBlock_greedy_dictMatchState_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy2_dictMatchState_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState); - } - -- - size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch); - } -+#endif - --size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_btlazy2( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); - } - --size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_btlazy2_dictMatchState( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); - } -+#endif - -+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) - FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_compressBlock_lazy_extDict_generic( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize, - const searchMethod_e searchMethod, const U32 depth) -@@ -1886,12 +1960,13 @@ size_t ZSTD_compressBlock_lazy_extDict_g - - DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod); - -+ /* Reset the lazy skipping state */ -+ ms->lazySkipping = 0; -+ - /* init */ - ip += (ip == prefixStart); - if (searchMethod == search_rowHash) { -- ZSTD_row_fillHashCache(ms, base, rowLog, -- MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */), -- ms->nextToUpdate, ilimit); -+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); - } - - /* Match Loop */ -@@ -1903,7 +1978,7 @@ size_t ZSTD_compressBlock_lazy_extDict_g - #endif - while (ip < ilimit) { - size_t matchLength=0; -- size_t offcode=STORE_REPCODE_1; -+ size_t offBase = REPCODE1_TO_OFFBASE; - const BYTE* start=ip+1; - U32 curr = (U32)(ip-base); - -@@ -1912,7 +1987,7 @@ size_t ZSTD_compressBlock_lazy_extDict_g - const U32 repIndex = (U32)(curr+1 - offset_1); - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; -- if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ -+ if ( (ZSTD_index_overlap_check(dictLimit, repIndex)) - & (offset_1 <= curr+1 - windowLow) ) /* note: we are searching at curr+1 */ - if (MEM_read32(ip+1) == MEM_read32(repMatch)) { - /* repcode detected we should take it */ -@@ -1922,14 +1997,23 @@ size_t ZSTD_compressBlock_lazy_extDict_g - } } - - /* first search (depth 0) */ -- { size_t offsetFound = 999999999; -- size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offsetFound, mls, rowLog, searchMethod, ZSTD_extDict); -+ { size_t ofbCandidate = 999999999; -+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict); - if (ml2 > matchLength) -- matchLength = ml2, start = ip, offcode=offsetFound; -+ matchLength = ml2, start = ip, offBase = ofbCandidate; - } - - if (matchLength < 4) { -- ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ -+ size_t const step = ((size_t)(ip-anchor) >> kSearchStrength); -+ ip += step + 1; /* jump faster over incompressible sections */ -+ /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time. -+ * In this mode we stop inserting every position into our tables, and only insert -+ * positions that we search, which is one in step positions. -+ * The exact cutoff is flexible, I've just chosen a number that is reasonably high, -+ * so we minimize the compression ratio loss in "normal" scenarios. This mode gets -+ * triggered once we've gone 2KB without finding any matches. -+ */ -+ ms->lazySkipping = step > kLazySkippingStep; - continue; - } - -@@ -1939,30 +2023,30 @@ size_t ZSTD_compressBlock_lazy_extDict_g - ip ++; - curr++; - /* check repCode */ -- if (offcode) { -+ if (offBase) { - const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog); - const U32 repIndex = (U32)(curr - offset_1); - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; -- if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ -+ if ( (ZSTD_index_overlap_check(dictLimit, repIndex)) - & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; - int const gain2 = (int)(repLength * 3); -- int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); -+ int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1); - if ((repLength >= 4) && (gain2 > gain1)) -- matchLength = repLength, offcode = STORE_REPCODE_1, start = ip; -+ matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip; - } } - - /* search match, depth 1 */ -- { size_t offset2=999999999; -- size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, ZSTD_extDict); -- int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ -- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4); -+ { size_t ofbCandidate = 999999999; -+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict); -+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ -+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4); - if ((ml2 >= 4) && (gain2 > gain1)) { -- matchLength = ml2, offcode = offset2, start = ip; -+ matchLength = ml2, offBase = ofbCandidate, start = ip; - continue; /* search a better one */ - } } - -@@ -1971,50 +2055,57 @@ size_t ZSTD_compressBlock_lazy_extDict_g - ip ++; - curr++; - /* check repCode */ -- if (offcode) { -+ if (offBase) { - const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog); - const U32 repIndex = (U32)(curr - offset_1); - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; -- if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ -+ if ( (ZSTD_index_overlap_check(dictLimit, repIndex)) - & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; - int const gain2 = (int)(repLength * 4); -- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); -+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1); - if ((repLength >= 4) && (gain2 > gain1)) -- matchLength = repLength, offcode = STORE_REPCODE_1, start = ip; -+ matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip; - } } - - /* search match, depth 2 */ -- { size_t offset2=999999999; -- size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, ZSTD_extDict); -- int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ -- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7); -+ { size_t ofbCandidate = 999999999; -+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict); -+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ -+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7); - if ((ml2 >= 4) && (gain2 > gain1)) { -- matchLength = ml2, offcode = offset2, start = ip; -+ matchLength = ml2, offBase = ofbCandidate, start = ip; - continue; - } } } - break; /* nothing found : store previous solution */ - } - - /* catch up */ -- if (STORED_IS_OFFSET(offcode)) { -- U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode)); -+ if (OFFBASE_IS_OFFSET(offBase)) { -+ U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase)); - const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; - const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; - while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ -- offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode); -+ offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase); - } - - /* store sequence */ - _storeSequence: - { size_t const litLength = (size_t)(start - anchor); -- ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength); -+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength); - anchor = ip = start + matchLength; - } -+ if (ms->lazySkipping) { -+ /* We've found a match, disable lazy skipping mode, and refill the hash cache. */ -+ if (searchMethod == search_rowHash) { -+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); -+ } -+ ms->lazySkipping = 0; -+ } - - /* check immediate repcode */ - while (ip <= ilimit) { -@@ -2023,14 +2114,14 @@ _storeSequence: - const U32 repIndex = repCurrent - offset_2; - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; -- if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ -+ if ( (ZSTD_index_overlap_check(dictLimit, repIndex)) - & (offset_2 <= repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; -- offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap offset history */ -- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength); -+ offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap offset history */ -+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ -@@ -2045,58 +2136,65 @@ _storeSequence: - /* Return the last literals size */ - return (size_t)(iend - anchor); - } -+#endif /* build exclusions */ - -- -+#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR - size_t ZSTD_compressBlock_greedy_extDict( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); - } - --size_t ZSTD_compressBlock_lazy_extDict( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_greedy_extDict_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -- - { -- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); -+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0); - } -+#endif - --size_t ZSTD_compressBlock_lazy2_extDict( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_lazy_extDict( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - - { -- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); -+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); - } - --size_t ZSTD_compressBlock_btlazy2_extDict( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy_extDict_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - - { -- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); -+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1); - } -+#endif - --size_t ZSTD_compressBlock_greedy_extDict_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_lazy2_extDict( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -+ - { -- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0); -+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); - } - --size_t ZSTD_compressBlock_lazy_extDict_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy2_extDict_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -- - { -- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1); -+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2); - } -+#endif - --size_t ZSTD_compressBlock_lazy2_extDict_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_btlazy2_extDict( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - - { -- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2); -+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); - } -+#endif ---- a/lib/zstd/compress/zstd_lazy.h -+++ b/lib/zstd/compress/zstd_lazy.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -11,7 +12,6 @@ - #ifndef ZSTD_LAZY_H - #define ZSTD_LAZY_H - -- - #include "zstd_compress_internal.h" - - /* -@@ -22,98 +22,173 @@ - */ - #define ZSTD_LAZY_DDSS_BUCKET_LOG 2 - --U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); --void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip); -+#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */ -+ -+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) -+U32 ZSTD_insertAndFindFirstIndex(ZSTD_MatchState_t* ms, const BYTE* ip); -+void ZSTD_row_update(ZSTD_MatchState_t* const ms, const BYTE* ip); - --void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); -+void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_MatchState_t* ms, const BYTE* const ip); - - void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ -+#endif - --size_t ZSTD_compressBlock_btlazy2( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_greedy( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy2( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_greedy_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_greedy_dictMatchState( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_greedy( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_greedy_dictMatchState_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy2_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_greedy_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_greedy_extDict( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ void const* src, size_t srcSize); -+size_t ZSTD_compressBlock_greedy_extDict_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - --size_t ZSTD_compressBlock_btlazy2_dictMatchState( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+#define ZSTD_COMPRESSBLOCK_GREEDY ZSTD_compressBlock_greedy -+#define ZSTD_COMPRESSBLOCK_GREEDY_ROW ZSTD_compressBlock_greedy_row -+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE ZSTD_compressBlock_greedy_dictMatchState -+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW ZSTD_compressBlock_greedy_dictMatchState_row -+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH ZSTD_compressBlock_greedy_dedicatedDictSearch -+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_greedy_dedicatedDictSearch_row -+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT ZSTD_compressBlock_greedy_extDict -+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW ZSTD_compressBlock_greedy_extDict_row -+#else -+#define ZSTD_COMPRESSBLOCK_GREEDY NULL -+#define ZSTD_COMPRESSBLOCK_GREEDY_ROW NULL -+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE NULL -+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW NULL -+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH NULL -+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW NULL -+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT NULL -+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW NULL -+#endif -+ -+#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_lazy( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy2_dictMatchState( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - size_t ZSTD_compressBlock_lazy_dictMatchState( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -- void const* src, size_t srcSize); --size_t ZSTD_compressBlock_greedy_dictMatchState( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -- void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy2_dictMatchState_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - size_t ZSTD_compressBlock_lazy_dictMatchState_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_greedy_dictMatchState_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -- --size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy_extDict( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy_extDict_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ -+#define ZSTD_COMPRESSBLOCK_LAZY ZSTD_compressBlock_lazy -+#define ZSTD_COMPRESSBLOCK_LAZY_ROW ZSTD_compressBlock_lazy_row -+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE ZSTD_compressBlock_lazy_dictMatchState -+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy_dictMatchState_row -+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy_dedicatedDictSearch -+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy_dedicatedDictSearch_row -+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT ZSTD_compressBlock_lazy_extDict -+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW ZSTD_compressBlock_lazy_extDict_row -+#else -+#define ZSTD_COMPRESSBLOCK_LAZY NULL -+#define ZSTD_COMPRESSBLOCK_LAZY_ROW NULL -+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE NULL -+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW NULL -+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH NULL -+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW NULL -+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT NULL -+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW NULL -+#endif -+ -+#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_lazy2( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy2_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy2_dictMatchState( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -- --size_t ZSTD_compressBlock_greedy_extDict( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy2_dictMatchState_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy_extDict( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ void const* src, size_t srcSize); -+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - size_t ZSTD_compressBlock_lazy2_extDict( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_greedy_extDict_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_lazy2_extDict_row( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy_extDict_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ -+#define ZSTD_COMPRESSBLOCK_LAZY2 ZSTD_compressBlock_lazy2 -+#define ZSTD_COMPRESSBLOCK_LAZY2_ROW ZSTD_compressBlock_lazy2_row -+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE ZSTD_compressBlock_lazy2_dictMatchState -+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy2_dictMatchState_row -+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy2_dedicatedDictSearch -+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy2_dedicatedDictSearch_row -+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT ZSTD_compressBlock_lazy2_extDict -+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW ZSTD_compressBlock_lazy2_extDict_row -+#else -+#define ZSTD_COMPRESSBLOCK_LAZY2 NULL -+#define ZSTD_COMPRESSBLOCK_LAZY2_ROW NULL -+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE NULL -+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW NULL -+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH NULL -+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW NULL -+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT NULL -+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW NULL -+#endif -+ -+#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_btlazy2( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy2_extDict_row( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_btlazy2_dictMatchState( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - size_t ZSTD_compressBlock_btlazy2_extDict( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -- - -+#define ZSTD_COMPRESSBLOCK_BTLAZY2 ZSTD_compressBlock_btlazy2 -+#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE ZSTD_compressBlock_btlazy2_dictMatchState -+#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT ZSTD_compressBlock_btlazy2_extDict -+#else -+#define ZSTD_COMPRESSBLOCK_BTLAZY2 NULL -+#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE NULL -+#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT NULL -+#endif - - #endif /* ZSTD_LAZY_H */ ---- a/lib/zstd/compress/zstd_ldm.c -+++ b/lib/zstd/compress/zstd_ldm.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -16,7 +17,7 @@ - #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ - #include "zstd_ldm_geartab.h" - --#define LDM_BUCKET_SIZE_LOG 3 -+#define LDM_BUCKET_SIZE_LOG 4 - #define LDM_MIN_MATCH_LENGTH 64 - #define LDM_HASH_RLOG 7 - -@@ -133,21 +134,35 @@ done: - } - - void ZSTD_ldm_adjustParameters(ldmParams_t* params, -- ZSTD_compressionParameters const* cParams) -+ const ZSTD_compressionParameters* cParams) - { - params->windowLog = cParams->windowLog; - ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); - DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); -- if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; -- if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; -+ if (params->hashRateLog == 0) { -+ if (params->hashLog > 0) { -+ /* if params->hashLog is set, derive hashRateLog from it */ -+ assert(params->hashLog <= ZSTD_HASHLOG_MAX); -+ if (params->windowLog > params->hashLog) { -+ params->hashRateLog = params->windowLog - params->hashLog; -+ } -+ } else { -+ assert(1 <= (int)cParams->strategy && (int)cParams->strategy <= 9); -+ /* mapping from [fast, rate7] to [btultra2, rate4] */ -+ params->hashRateLog = 7 - (cParams->strategy/3); -+ } -+ } - if (params->hashLog == 0) { -- params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); -- assert(params->hashLog <= ZSTD_HASHLOG_MAX); -+ params->hashLog = BOUNDED(ZSTD_HASHLOG_MIN, params->windowLog - params->hashRateLog, ZSTD_HASHLOG_MAX); - } -- if (params->hashRateLog == 0) { -- params->hashRateLog = params->windowLog < params->hashLog -- ? 0 -- : params->windowLog - params->hashLog; -+ if (params->minMatchLength == 0) { -+ params->minMatchLength = LDM_MIN_MATCH_LENGTH; -+ if (cParams->strategy >= ZSTD_btultra) -+ params->minMatchLength /= 2; -+ } -+ if (params->bucketSizeLog==0) { -+ assert(1 <= (int)cParams->strategy && (int)cParams->strategy <= 9); -+ params->bucketSizeLog = BOUNDED(LDM_BUCKET_SIZE_LOG, (U32)cParams->strategy, ZSTD_LDM_BUCKETSIZELOG_MAX); - } - params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); - } -@@ -170,22 +185,22 @@ size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t - /* ZSTD_ldm_getBucket() : - * Returns a pointer to the start of the bucket associated with hash. */ - static ldmEntry_t* ZSTD_ldm_getBucket( -- ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) -+ const ldmState_t* ldmState, size_t hash, U32 const bucketSizeLog) - { -- return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); -+ return ldmState->hashTable + (hash << bucketSizeLog); - } - - /* ZSTD_ldm_insertEntry() : - * Insert the entry with corresponding hash into the hash table */ - static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, - size_t const hash, const ldmEntry_t entry, -- ldmParams_t const ldmParams) -+ U32 const bucketSizeLog) - { - BYTE* const pOffset = ldmState->bucketOffsets + hash; - unsigned const offset = *pOffset; - -- *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry; -- *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1)); -+ *(ZSTD_ldm_getBucket(ldmState, hash, bucketSizeLog) + offset) = entry; -+ *pOffset = (BYTE)((offset + 1) & ((1u << bucketSizeLog) - 1)); - - } - -@@ -234,7 +249,7 @@ static size_t ZSTD_ldm_countBackwardsMat - * - * The tables for the other strategies are filled within their - * block compressors. */ --static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, -+static size_t ZSTD_ldm_fillFastTables(ZSTD_MatchState_t* ms, - void const* end) - { - const BYTE* const iend = (const BYTE*)end; -@@ -242,11 +257,15 @@ static size_t ZSTD_ldm_fillFastTables(ZS - switch(ms->cParams.strategy) - { - case ZSTD_fast: -- ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast); -+ ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx); - break; - - case ZSTD_dfast: -- ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast); -+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR -+ ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx); -+#else -+ assert(0); /* shouldn't be called: cparams should've been adjusted. */ -+#endif - break; - - case ZSTD_greedy: -@@ -269,7 +288,8 @@ void ZSTD_ldm_fillHashTable( - const BYTE* iend, ldmParams_t const* params) - { - U32 const minMatchLength = params->minMatchLength; -- U32 const hBits = params->hashLog - params->bucketSizeLog; -+ U32 const bucketSizeLog = params->bucketSizeLog; -+ U32 const hBits = params->hashLog - bucketSizeLog; - BYTE const* const base = ldmState->window.base; - BYTE const* const istart = ip; - ldmRollingHashState_t hashState; -@@ -284,7 +304,7 @@ void ZSTD_ldm_fillHashTable( - unsigned n; - - numSplits = 0; -- hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits); -+ hashed = ZSTD_ldm_gear_feed(&hashState, ip, (size_t)(iend - ip), splits, &numSplits); - - for (n = 0; n < numSplits; n++) { - if (ip + splits[n] >= istart + minMatchLength) { -@@ -295,7 +315,7 @@ void ZSTD_ldm_fillHashTable( - - entry.offset = (U32)(split - base); - entry.checksum = (U32)(xxhash >> 32); -- ZSTD_ldm_insertEntry(ldmState, hash, entry, *params); -+ ZSTD_ldm_insertEntry(ldmState, hash, entry, params->bucketSizeLog); - } - } - -@@ -309,7 +329,7 @@ void ZSTD_ldm_fillHashTable( - * Sets cctx->nextToUpdate to a position corresponding closer to anchor - * if it is far way - * (after a long match, only update tables a limited amount). */ --static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) -+static void ZSTD_ldm_limitTableUpdate(ZSTD_MatchState_t* ms, const BYTE* anchor) - { - U32 const curr = (U32)(anchor - ms->window.base); - if (curr > ms->nextToUpdate + 1024) { -@@ -318,8 +338,10 @@ static void ZSTD_ldm_limitTableUpdate(ZS - } - } - --static size_t ZSTD_ldm_generateSequences_internal( -- ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t ZSTD_ldm_generateSequences_internal( -+ ldmState_t* ldmState, RawSeqStore_t* rawSeqStore, - ldmParams_t const* params, void const* src, size_t srcSize) - { - /* LDM parameters */ -@@ -373,7 +395,7 @@ static size_t ZSTD_ldm_generateSequences - candidates[n].split = split; - candidates[n].hash = hash; - candidates[n].checksum = (U32)(xxhash >> 32); -- candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params); -+ candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, params->bucketSizeLog); - PREFETCH_L1(candidates[n].bucket); - } - -@@ -396,7 +418,7 @@ static size_t ZSTD_ldm_generateSequences - * the previous one, we merely register it in the hash table and - * move on */ - if (split < anchor) { -- ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); -+ ZSTD_ldm_insertEntry(ldmState, hash, newEntry, params->bucketSizeLog); - continue; - } - -@@ -443,7 +465,7 @@ static size_t ZSTD_ldm_generateSequences - /* No match found -- insert an entry into the hash table - * and process the next candidate match */ - if (bestEntry == NULL) { -- ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); -+ ZSTD_ldm_insertEntry(ldmState, hash, newEntry, params->bucketSizeLog); - continue; - } - -@@ -464,7 +486,7 @@ static size_t ZSTD_ldm_generateSequences - - /* Insert the current entry into the hash table --- it must be - * done after the previous block to avoid clobbering bestEntry */ -- ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); -+ ZSTD_ldm_insertEntry(ldmState, hash, newEntry, params->bucketSizeLog); - - anchor = split + forwardMatchLength; - -@@ -503,7 +525,7 @@ static void ZSTD_ldm_reduceTable(ldmEntr - } - - size_t ZSTD_ldm_generateSequences( -- ldmState_t* ldmState, rawSeqStore_t* sequences, -+ ldmState_t* ldmState, RawSeqStore_t* sequences, - ldmParams_t const* params, void const* src, size_t srcSize) - { - U32 const maxDist = 1U << params->windowLog; -@@ -549,7 +571,7 @@ size_t ZSTD_ldm_generateSequences( - * the window through early invalidation. - * TODO: * Test the chunk size. - * * Try invalidation after the sequence generation and test the -- * the offset against maxDist directly. -+ * offset against maxDist directly. - * - * NOTE: Because of dictionaries + sequence splitting we MUST make sure - * that any offset used is valid at the END of the sequence, since it may -@@ -580,7 +602,7 @@ size_t ZSTD_ldm_generateSequences( - } - - void --ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) -+ZSTD_ldm_skipSequences(RawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) - { - while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { - rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; -@@ -616,7 +638,7 @@ ZSTD_ldm_skipSequences(rawSeqStore_t* ra - * Returns the current sequence to handle, or if the rest of the block should - * be literals, it returns a sequence with offset == 0. - */ --static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, -+static rawSeq maybeSplitSequence(RawSeqStore_t* rawSeqStore, - U32 const remaining, U32 const minMatch) - { - rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; -@@ -640,7 +662,7 @@ static rawSeq maybeSplitSequence(rawSeqS - return sequence; - } - --void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { -+void ZSTD_ldm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes) { - U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); - while (currPos && rawSeqStore->pos < rawSeqStore->size) { - rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; -@@ -657,14 +679,14 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSe - } - } - --size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -- ZSTD_paramSwitch_e useRowMatchFinder, -+size_t ZSTD_ldm_blockCompress(RawSeqStore_t* rawSeqStore, -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_ParamSwitch_e useRowMatchFinder, - void const* src, size_t srcSize) - { - const ZSTD_compressionParameters* const cParams = &ms->cParams; - unsigned const minMatch = cParams->minMatch; -- ZSTD_blockCompressor const blockCompressor = -+ ZSTD_BlockCompressor_f const blockCompressor = - ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms)); - /* Input bounds */ - BYTE const* const istart = (BYTE const*)src; -@@ -689,7 +711,6 @@ size_t ZSTD_ldm_blockCompress(rawSeqStor - /* maybeSplitSequence updates rawSeqStore->pos */ - rawSeq const sequence = maybeSplitSequence(rawSeqStore, - (U32)(iend - ip), minMatch); -- int i; - /* End signal */ - if (sequence.offset == 0) - break; -@@ -702,6 +723,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStor - /* Run the block compressor */ - DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength); - { -+ int i; - size_t const newLitLength = - blockCompressor(ms, seqStore, rep, ip, sequence.litLength); - ip += sequence.litLength; -@@ -711,7 +733,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStor - rep[0] = sequence.offset; - /* Store the sequence */ - ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, -- STORE_OFFSET(sequence.offset), -+ OFFSET_TO_OFFBASE(sequence.offset), - sequence.matchLength); - ip += sequence.matchLength; - } ---- a/lib/zstd/compress/zstd_ldm.h -+++ b/lib/zstd/compress/zstd_ldm.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -11,7 +12,6 @@ - #ifndef ZSTD_LDM_H - #define ZSTD_LDM_H - -- - #include "zstd_compress_internal.h" /* ldmParams_t, U32 */ - #include /* ZSTD_CCtx, size_t */ - -@@ -40,7 +40,7 @@ void ZSTD_ldm_fillHashTable( - * sequences. - */ - size_t ZSTD_ldm_generateSequences( -- ldmState_t* ldms, rawSeqStore_t* sequences, -+ ldmState_t* ldms, RawSeqStore_t* sequences, - ldmParams_t const* params, void const* src, size_t srcSize); - - /* -@@ -61,9 +61,9 @@ size_t ZSTD_ldm_generateSequences( - * two. We handle that case correctly, and update `rawSeqStore` appropriately. - * NOTE: This function does not return any errors. - */ --size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -- ZSTD_paramSwitch_e useRowMatchFinder, -+size_t ZSTD_ldm_blockCompress(RawSeqStore_t* rawSeqStore, -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_ParamSwitch_e useRowMatchFinder, - void const* src, size_t srcSize); - - /* -@@ -73,7 +73,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStor - * Avoids emitting matches less than `minMatch` bytes. - * Must be called for data that is not passed to ZSTD_ldm_blockCompress(). - */ --void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, -+void ZSTD_ldm_skipSequences(RawSeqStore_t* rawSeqStore, size_t srcSize, - U32 const minMatch); - - /* ZSTD_ldm_skipRawSeqStoreBytes(): -@@ -81,7 +81,7 @@ void ZSTD_ldm_skipSequences(rawSeqStore_ - * Not to be used in conjunction with ZSTD_ldm_skipSequences(). - * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). - */ --void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes); -+void ZSTD_ldm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes); - - /* ZSTD_ldm_getTableSize() : - * Estimate the space needed for long distance matching tables or 0 if LDM is -@@ -107,5 +107,4 @@ size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t - void ZSTD_ldm_adjustParameters(ldmParams_t* params, - ZSTD_compressionParameters const* cParams); - -- - #endif /* ZSTD_FAST_H */ ---- a/lib/zstd/compress/zstd_ldm_geartab.h -+++ b/lib/zstd/compress/zstd_ldm_geartab.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the ---- a/lib/zstd/compress/zstd_opt.c -+++ b/lib/zstd/compress/zstd_opt.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -12,11 +13,14 @@ - #include "hist.h" - #include "zstd_opt.h" - -+#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) - - #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ - #define ZSTD_MAX_PRICE (1<<30) - --#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ -+#define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ - - - /*-************************************* -@@ -26,27 +30,35 @@ - #if 0 /* approximation at bit level (for tests) */ - # define BITCOST_ACCURACY 0 - # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) --# define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat)) -+# define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat)) - #elif 0 /* fractional bit accuracy (for tests) */ - # define BITCOST_ACCURACY 8 - # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) --# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) -+# define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat)) - #else /* opt==approx, ultra==accurate */ - # define BITCOST_ACCURACY 8 - # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) --# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) -+# define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) - #endif - -+/* ZSTD_bitWeight() : -+ * provide estimated "cost" of a stat in full bits only */ - MEM_STATIC U32 ZSTD_bitWeight(U32 stat) - { - return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); - } - -+/* ZSTD_fracWeight() : -+ * provide fractional-bit "cost" of a stat, -+ * using linear interpolation approximation */ - MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) - { - U32 const stat = rawStat + 1; - U32 const hb = ZSTD_highbit32(stat); - U32 const BWeight = hb * BITCOST_MULTIPLIER; -+ /* Fweight was meant for "Fractional weight" -+ * but it's effectively a value between 1 and 2 -+ * using fixed point arithmetic */ - U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; - U32 const weight = BWeight + FWeight; - assert(hb + BITCOST_ACCURACY < 31); -@@ -57,7 +69,7 @@ MEM_STATIC U32 ZSTD_fracWeight(U32 rawSt - /* debugging function, - * @return price in bytes as fractional value - * for debug messages only */ --MEM_STATIC double ZSTD_fCost(U32 price) -+MEM_STATIC double ZSTD_fCost(int price) - { - return (double)price / (BITCOST_MULTIPLIER*8); - } -@@ -88,20 +100,26 @@ static U32 sum_u32(const unsigned table[ - return total; - } - --static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift) -+typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e; -+ -+static U32 -+ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1) - { - U32 s, sum=0; -- DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift); -+ DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", -+ (unsigned)lastEltIndex+1, (unsigned)shift ); - assert(shift < 30); - for (s=0; s> shift); -- sum += table[s]; -+ unsigned const base = base1 ? 1 : (table[s]>0); -+ unsigned const newStat = base + (table[s] >> shift); -+ sum += newStat; -+ table[s] = newStat; - } - return sum; - } - - /* ZSTD_scaleStats() : -- * reduce all elements in table is sum too large -+ * reduce all elt frequencies in table if sum too large - * return the resulting sum of elements */ - static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget) - { -@@ -110,7 +128,7 @@ static U32 ZSTD_scaleStats(unsigned* tab - DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget); - assert(logTarget < 30); - if (factor <= 1) return prevsum; -- return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor)); -+ return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed); - } - - /* ZSTD_rescaleFreqs() : -@@ -129,18 +147,22 @@ ZSTD_rescaleFreqs(optState_t* const optP - DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); - optPtr->priceType = zop_dynamic; - -- if (optPtr->litLengthSum == 0) { /* first block : init */ -- if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */ -- DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef"); -+ if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */ -+ -+ /* heuristic: use pre-defined stats for too small inputs */ -+ if (srcSize <= ZSTD_PREDEF_THRESHOLD) { -+ DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD); - optPtr->priceType = zop_predef; - } - - assert(optPtr->symbolCosts != NULL); - if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { -- /* huffman table presumed generated by dictionary */ -+ -+ /* huffman stats covering the full value set : table presumed generated by dictionary */ - optPtr->priceType = zop_dynamic; - - if (compressedLiterals) { -+ /* generate literals statistics from huffman table */ - unsigned lit; - assert(optPtr->litFreq != NULL); - optPtr->litSum = 0; -@@ -188,13 +210,14 @@ ZSTD_rescaleFreqs(optState_t* const optP - optPtr->offCodeSum += optPtr->offCodeFreq[of]; - } } - -- } else { /* not a dictionary */ -+ } else { /* first block, no dictionary */ - - assert(optPtr->litFreq != NULL); - if (compressedLiterals) { -+ /* base initial cost of literals on direct frequency within src */ - unsigned lit = MaxLit; - HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ -- optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8); -+ optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible); - } - - { unsigned const baseLLfreqs[MaxLL+1] = { -@@ -224,10 +247,9 @@ ZSTD_rescaleFreqs(optState_t* const optP - optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1); - } - -- - } - -- } else { /* new block : re-use previous statistics, scaled down */ -+ } else { /* new block : scale down accumulated statistics */ - - if (compressedLiterals) - optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12); -@@ -246,6 +268,7 @@ static U32 ZSTD_rawLiteralsCost(const BY - const optState_t* const optPtr, - int optLevel) - { -+ DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength); - if (litLength == 0) return 0; - - if (!ZSTD_compressedLiterals(optPtr)) -@@ -255,11 +278,14 @@ static U32 ZSTD_rawLiteralsCost(const BY - return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ - - /* dynamic statistics */ -- { U32 price = litLength * optPtr->litSumBasePrice; -+ { U32 price = optPtr->litSumBasePrice * litLength; -+ U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER; - U32 u; -+ assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER); - for (u=0; u < litLength; u++) { -- assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */ -- price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel); -+ U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel); -+ if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax; -+ price -= litPrice; - } - return price; - } -@@ -272,10 +298,11 @@ static U32 ZSTD_litLengthPrice(U32 const - assert(litLength <= ZSTD_BLOCKSIZE_MAX); - if (optPtr->priceType == zop_predef) - return WEIGHT(litLength, optLevel); -- /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX -- * because it isn't representable in the zstd format. So instead just -- * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block -- * would be all literals. -+ -+ /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX -+ * because it isn't representable in the zstd format. -+ * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. -+ * In such a case, the block would be all literals. - */ - if (litLength == ZSTD_BLOCKSIZE_MAX) - return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel); -@@ -289,24 +316,25 @@ static U32 ZSTD_litLengthPrice(U32 const - } - - /* ZSTD_getMatchPrice() : -- * Provides the cost of the match part (offset + matchLength) of a sequence -+ * Provides the cost of the match part (offset + matchLength) of a sequence. - * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. -- * @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2 -+ * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq() - * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) - */ - FORCE_INLINE_TEMPLATE U32 --ZSTD_getMatchPrice(U32 const offcode, -+ZSTD_getMatchPrice(U32 const offBase, - U32 const matchLength, - const optState_t* const optPtr, - int const optLevel) - { - U32 price; -- U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode)); -+ U32 const offCode = ZSTD_highbit32(offBase); - U32 const mlBase = matchLength - MINMATCH; - assert(matchLength >= MINMATCH); - -- if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ -- return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER); -+ if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */ -+ return WEIGHT(mlBase, optLevel) -+ + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */ - - /* dynamic statistics */ - price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); -@@ -325,10 +353,10 @@ ZSTD_getMatchPrice(U32 const offcode, - } - - /* ZSTD_updateStats() : -- * assumption : literals + litLengtn <= iend */ -+ * assumption : literals + litLength <= iend */ - static void ZSTD_updateStats(optState_t* const optPtr, - U32 litLength, const BYTE* literals, -- U32 offsetCode, U32 matchLength) -+ U32 offBase, U32 matchLength) - { - /* literals */ - if (ZSTD_compressedLiterals(optPtr)) { -@@ -344,8 +372,8 @@ static void ZSTD_updateStats(optState_t* - optPtr->litLengthSum++; - } - -- /* offset code : expected to follow storeSeq() numeric representation */ -- { U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode)); -+ /* offset code : follows storeSeq() numeric representation */ -+ { U32 const offCode = ZSTD_highbit32(offBase); - assert(offCode <= MaxOff); - optPtr->offCodeFreq[offCode]++; - optPtr->offCodeSum++; -@@ -379,9 +407,11 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const v - - /* Update hashTable3 up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ --static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms, -- U32* nextToUpdate3, -- const BYTE* const ip) -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_MatchState_t* ms, -+ U32* nextToUpdate3, -+ const BYTE* const ip) - { - U32* const hashTable3 = ms->hashTable3; - U32 const hashLog3 = ms->hashLog3; -@@ -408,8 +438,10 @@ static U32 ZSTD_insertAndFindFirstIndexH - * @param ip assumed <= iend-8 . - * @param target The target of ZSTD_updateTree_internal() - we are filling to this position - * @return : nb of positions added */ --static U32 ZSTD_insertBt1( -- const ZSTD_matchState_t* ms, -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+U32 ZSTD_insertBt1( -+ const ZSTD_MatchState_t* ms, - const BYTE* const ip, const BYTE* const iend, - U32 const target, - U32 const mls, const int extDict) -@@ -527,15 +559,16 @@ static U32 ZSTD_insertBt1( - } - - FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - void ZSTD_updateTree_internal( -- ZSTD_matchState_t* ms, -+ ZSTD_MatchState_t* ms, - const BYTE* const ip, const BYTE* const iend, - const U32 mls, const ZSTD_dictMode_e dictMode) - { - const BYTE* const base = ms->window.base; - U32 const target = (U32)(ip - base); - U32 idx = ms->nextToUpdate; -- DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", -+ DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", - idx, target, dictMode); - - while(idx < target) { -@@ -548,20 +581,23 @@ void ZSTD_updateTree_internal( - ms->nextToUpdate = target; - } - --void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { -+void ZSTD_updateTree(ZSTD_MatchState_t* ms, const BYTE* ip, const BYTE* iend) { - ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); - } - - FORCE_INLINE_TEMPLATE --U32 ZSTD_insertBtAndGetAllMatches ( -- ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ -- ZSTD_matchState_t* ms, -- U32* nextToUpdate3, -- const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, -- const U32 rep[ZSTD_REP_NUM], -- U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ -- const U32 lengthToBeat, -- U32 const mls /* template */) -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+U32 -+ZSTD_insertBtAndGetAllMatches ( -+ ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ -+ ZSTD_MatchState_t* ms, -+ U32* nextToUpdate3, -+ const BYTE* const ip, const BYTE* const iLimit, -+ const ZSTD_dictMode_e dictMode, -+ const U32 rep[ZSTD_REP_NUM], -+ const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ -+ const U32 lengthToBeat, -+ const U32 mls /* template */) - { - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); -@@ -590,7 +626,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( - U32 mnum = 0; - U32 nbCompares = 1U << cParams->searchLog; - -- const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; -+ const ZSTD_MatchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; - const ZSTD_compressionParameters* const dmsCParams = - dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL; - const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; -@@ -629,13 +665,13 @@ U32 ZSTD_insertBtAndGetAllMatches ( - assert(curr >= windowLow); - if ( dictMode == ZSTD_extDict - && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */ -- & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) -+ & (ZSTD_index_overlap_check(dictLimit, repIndex)) ) - && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { - repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; - } - if (dictMode == ZSTD_dictMatchState - && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */ -- & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */ -+ & (ZSTD_index_overlap_check(dictLimit, repIndex)) ) - && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { - repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch; - } } -@@ -644,7 +680,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( - DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", - repCode, ll0, repOffset, repLen); - bestLength = repLen; -- matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1); /* expect value between 1 and 3 */ -+ matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */ - matches[mnum].len = (U32)repLen; - mnum++; - if ( (repLen > sufficient_len) -@@ -673,7 +709,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( - bestLength = mlen; - assert(curr > matchIndex3); - assert(mnum==0); /* no prior solution */ -- matches[0].off = STORE_OFFSET(curr - matchIndex3); -+ matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3); - matches[0].len = (U32)mlen; - mnum = 1; - if ( (mlen > sufficient_len) | -@@ -706,13 +742,13 @@ U32 ZSTD_insertBtAndGetAllMatches ( - } - - if (matchLength > bestLength) { -- DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)", -- (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex)); -+ DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)", -+ (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex)); - assert(matchEndIdx > matchIndex); - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - bestLength = matchLength; -- matches[mnum].off = STORE_OFFSET(curr - matchIndex); -+ matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex); - matches[mnum].len = (U32)matchLength; - mnum++; - if ( (matchLength > ZSTD_OPT_NUM) -@@ -754,12 +790,12 @@ U32 ZSTD_insertBtAndGetAllMatches ( - - if (matchLength > bestLength) { - matchIndex = dictMatchIndex + dmsIndexDelta; -- DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)", -- (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex)); -+ DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)", -+ (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex)); - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - bestLength = matchLength; -- matches[mnum].off = STORE_OFFSET(curr - matchIndex); -+ matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex); - matches[mnum].len = (U32)matchLength; - mnum++; - if ( (matchLength > ZSTD_OPT_NUM) -@@ -784,7 +820,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( - - typedef U32 (*ZSTD_getAllMatchesFn)( - ZSTD_match_t*, -- ZSTD_matchState_t*, -+ ZSTD_MatchState_t*, - U32*, - const BYTE*, - const BYTE*, -@@ -792,9 +828,11 @@ typedef U32 (*ZSTD_getAllMatchesFn)( - U32 const ll0, - U32 const lengthToBeat); - --FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal( -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+U32 ZSTD_btGetAllMatches_internal( - ZSTD_match_t* matches, -- ZSTD_matchState_t* ms, -+ ZSTD_MatchState_t* ms, - U32* nextToUpdate3, - const BYTE* ip, - const BYTE* const iHighLimit, -@@ -817,7 +855,7 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllM - #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \ - static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \ - ZSTD_match_t* matches, \ -- ZSTD_matchState_t* ms, \ -+ ZSTD_MatchState_t* ms, \ - U32* nextToUpdate3, \ - const BYTE* ip, \ - const BYTE* const iHighLimit, \ -@@ -849,7 +887,7 @@ GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchSta - } - - static ZSTD_getAllMatchesFn --ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode) -+ZSTD_selectBtGetAllMatches(ZSTD_MatchState_t const* ms, ZSTD_dictMode_e const dictMode) - { - ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = { - ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict), -@@ -868,7 +906,7 @@ ZSTD_selectBtGetAllMatches(ZSTD_matchSta - - /* Struct containing info needed to make decision about ldm inclusion */ - typedef struct { -- rawSeqStore_t seqStore; /* External match candidates store for this block */ -+ RawSeqStore_t seqStore; /* External match candidates store for this block */ - U32 startPosInBlock; /* Start position of the current match candidate */ - U32 endPosInBlock; /* End position of the current match candidate */ - U32 offset; /* Offset of the match candidate */ -@@ -878,7 +916,7 @@ typedef struct { - * Moves forward in @rawSeqStore by @nbBytes, - * which will update the fields 'pos' and 'posInSequence'. - */ --static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) -+static void ZSTD_optLdm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes) - { - U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); - while (currPos && rawSeqStore->pos < rawSeqStore->size) { -@@ -935,7 +973,7 @@ ZSTD_opt_getNextMatchAndUpdateSeqStore(Z - return; - } - -- /* Matches may be < MINMATCH by this process. In that case, we will reject them -+ /* Matches may be < minMatch by this process. In that case, we will reject them - when we are deciding whether or not to add the ldm */ - optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining; - optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining; -@@ -957,25 +995,26 @@ ZSTD_opt_getNextMatchAndUpdateSeqStore(Z - * into 'matches'. Maintains the correct ordering of 'matches'. - */ - static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, -- const ZSTD_optLdm_t* optLdm, U32 currPosInBlock) -+ const ZSTD_optLdm_t* optLdm, U32 currPosInBlock, -+ U32 minMatch) - { - U32 const posDiff = currPosInBlock - optLdm->startPosInBlock; -- /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */ -+ /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */ - U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff; - - /* Ensure that current block position is not outside of the match */ - if (currPosInBlock < optLdm->startPosInBlock - || currPosInBlock >= optLdm->endPosInBlock -- || candidateMatchLength < MINMATCH) { -+ || candidateMatchLength < minMatch) { - return; - } - - if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) { -- U32 const candidateOffCode = STORE_OFFSET(optLdm->offset); -- DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u", -- candidateOffCode, candidateMatchLength, currPosInBlock); -+ U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset); -+ DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u", -+ candidateOffBase, candidateMatchLength, currPosInBlock); - matches[*nbMatches].len = candidateMatchLength; -- matches[*nbMatches].off = candidateOffCode; -+ matches[*nbMatches].off = candidateOffBase; - (*nbMatches)++; - } - } -@@ -986,7 +1025,8 @@ static void ZSTD_optLdm_maybeAddMatch(ZS - static void - ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, - ZSTD_match_t* matches, U32* nbMatches, -- U32 currPosInBlock, U32 remainingBytes) -+ U32 currPosInBlock, U32 remainingBytes, -+ U32 minMatch) - { - if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { - return; -@@ -1003,7 +1043,7 @@ ZSTD_optLdm_processMatchCandidate(ZSTD_o - } - ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes); - } -- ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock); -+ ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock, minMatch); - } - - -@@ -1011,11 +1051,6 @@ ZSTD_optLdm_processMatchCandidate(ZSTD_o - * Optimal parser - *********************************/ - --static U32 ZSTD_totalLen(ZSTD_optimal_t sol) --{ -- return sol.litlen + sol.mlen; --} -- - #if 0 /* debug */ - - static void -@@ -1033,9 +1068,15 @@ listStats(const U32* table, int lastEltI - - #endif - --FORCE_INLINE_TEMPLATE size_t --ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, -- seqStore_t* seqStore, -+#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel) -+#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel) -+#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1)) -+ -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t -+ZSTD_compressBlock_opt_generic(ZSTD_MatchState_t* ms, -+ SeqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize, - const int optLevel, -@@ -1059,9 +1100,11 @@ ZSTD_compressBlock_opt_generic(ZSTD_matc - - ZSTD_optimal_t* const opt = optStatePtr->priceTable; - ZSTD_match_t* const matches = optStatePtr->matchTable; -- ZSTD_optimal_t lastSequence; -+ ZSTD_optimal_t lastStretch; - ZSTD_optLdm_t optLdm; - -+ ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t)); -+ - optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore; - optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0; - ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip)); -@@ -1082,103 +1125,140 @@ ZSTD_compressBlock_opt_generic(ZSTD_matc - U32 const ll0 = !litlen; - U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch); - ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, -- (U32)(ip-istart), (U32)(iend - ip)); -- if (!nbMatches) { ip++; continue; } -+ (U32)(ip-istart), (U32)(iend-ip), -+ minMatch); -+ if (!nbMatches) { -+ DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart)); -+ ip++; -+ continue; -+ } -+ -+ /* Match found: let's store this solution, and eventually find more candidates. -+ * During this forward pass, @opt is used to store stretches, -+ * defined as "a match followed by N literals". -+ * Note how this is different from a Sequence, which is "N literals followed by a match". -+ * Storing stretches allows us to store different match predecessors -+ * for each literal position part of a literals run. */ - - /* initialize opt[0] */ -- { U32 i ; for (i=0; i immediate encoding */ - { U32 const maxML = matches[nbMatches-1].len; -- U32 const maxOffcode = matches[nbMatches-1].off; -- DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series", -- nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart)); -+ U32 const maxOffBase = matches[nbMatches-1].off; -+ DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series", -+ nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart)); - - if (maxML > sufficient_len) { -- lastSequence.litlen = litlen; -- lastSequence.mlen = maxML; -- lastSequence.off = maxOffcode; -- DEBUGLOG(6, "large match (%u>%u), immediate encoding", -+ lastStretch.litlen = 0; -+ lastStretch.mlen = maxML; -+ lastStretch.off = maxOffBase; -+ DEBUGLOG(6, "large match (%u>%u) => immediate encoding", - maxML, sufficient_len); - cur = 0; -- last_pos = ZSTD_totalLen(lastSequence); -+ last_pos = maxML; - goto _shortestPath; - } } - - /* set prices for first matches starting position == 0 */ - assert(opt[0].price >= 0); -- { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); -- U32 pos; -+ { U32 pos; - U32 matchNb; - for (pos = 1; pos < minMatch; pos++) { -- opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ -+ opt[pos].price = ZSTD_MAX_PRICE; -+ opt[pos].mlen = 0; -+ opt[pos].litlen = litlen + pos; - } - for (matchNb = 0; matchNb < nbMatches; matchNb++) { -- U32 const offcode = matches[matchNb].off; -+ U32 const offBase = matches[matchNb].off; - U32 const end = matches[matchNb].len; - for ( ; pos <= end ; pos++ ) { -- U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel); -- U32 const sequencePrice = literalsPrice + matchPrice; -+ int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel); -+ int const sequencePrice = opt[0].price + matchPrice; - DEBUGLOG(7, "rPos:%u => set initial price : %.2f", - pos, ZSTD_fCost(sequencePrice)); - opt[pos].mlen = pos; -- opt[pos].off = offcode; -- opt[pos].litlen = litlen; -- opt[pos].price = (int)sequencePrice; -- } } -+ opt[pos].off = offBase; -+ opt[pos].litlen = 0; /* end of match */ -+ opt[pos].price = sequencePrice + LL_PRICE(0); -+ } -+ } - last_pos = pos-1; -+ opt[pos].price = ZSTD_MAX_PRICE; - } - } - - /* check further positions */ - for (cur = 1; cur <= last_pos; cur++) { - const BYTE* const inr = ip + cur; -- assert(cur < ZSTD_OPT_NUM); -- DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur) -+ assert(cur <= ZSTD_OPT_NUM); -+ DEBUGLOG(7, "cPos:%i==rPos:%u", (int)(inr-istart), cur); - - /* Fix current position with one literal if cheaper */ -- { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; -+ { U32 const litlen = opt[cur-1].litlen + 1; - int const price = opt[cur-1].price -- + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) -- + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) -- - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); -+ + LIT_PRICE(ip+cur-1) -+ + LL_INCPRICE(litlen); - assert(price < 1000000000); /* overflow check */ - if (price <= opt[cur].price) { -- DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", -- inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, -+ ZSTD_optimal_t const prevMatch = opt[cur]; -+ DEBUGLOG(7, "cPos:%i==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", -+ (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, - opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); -- opt[cur].mlen = 0; -- opt[cur].off = 0; -+ opt[cur] = opt[cur-1]; - opt[cur].litlen = litlen; - opt[cur].price = price; -+ if ( (optLevel >= 1) /* additional check only for higher modes */ -+ && (prevMatch.litlen == 0) /* replace a match */ -+ && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */ -+ && LIKELY(ip + cur < iend) -+ ) { -+ /* check next position, in case it would be cheaper */ -+ int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1); -+ int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1); -+ DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f", -+ cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals)); -+ if ( (with1literal < withMoreLiterals) -+ && (with1literal < opt[cur+1].price) ) { -+ /* update offset history - before it disappears */ -+ U32 const prev = cur - prevMatch.mlen; -+ Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0); -+ assert(cur >= prevMatch.mlen); -+ DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !", -+ ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals), -+ newReps.rep[0], newReps.rep[1], newReps.rep[2] ); -+ opt[cur+1] = prevMatch; /* mlen & offbase */ -+ ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(Repcodes_t)); -+ opt[cur+1].litlen = 1; -+ opt[cur+1].price = with1literal; -+ if (last_pos < cur+1) last_pos = cur+1; -+ } -+ } - } else { -- DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", -- inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), -- opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]); -+ DEBUGLOG(7, "cPos:%i==rPos:%u : literal would cost more (%.2f>%.2f)", -+ (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price)); - } - } - -- /* Set the repcodes of the current position. We must do it here -- * because we rely on the repcodes of the 2nd to last sequence being -- * correct to set the next chunks repcodes during the backward -- * traversal. -+ /* Offset history is not updated during match comparison. -+ * Do it here, now that the match is selected and confirmed. - */ -- ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); -+ ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(Repcodes_t)); - assert(cur >= opt[cur].mlen); -- if (opt[cur].mlen != 0) { -+ if (opt[cur].litlen == 0) { -+ /* just finished a match => alter offset history */ - U32 const prev = cur - opt[cur].mlen; -- repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); -- ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); -- } else { -- ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); -+ Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0); -+ ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(Repcodes_t)); - } - - /* last match must start at a minimum distance of 8 from oend */ -@@ -1188,38 +1268,37 @@ ZSTD_compressBlock_opt_generic(ZSTD_matc - - if ( (optLevel==0) /*static_test*/ - && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { -- DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1); -+ DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1); - continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ - } - - assert(opt[cur].price >= 0); -- { U32 const ll0 = (opt[cur].mlen != 0); -- U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; -- U32 const previousPrice = (U32)opt[cur].price; -- U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); -+ { U32 const ll0 = (opt[cur].litlen == 0); -+ int const previousPrice = opt[cur].price; -+ int const basePrice = previousPrice + LL_PRICE(0); - U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch); - U32 matchNb; - - ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, -- (U32)(inr-istart), (U32)(iend-inr)); -+ (U32)(inr-istart), (U32)(iend-inr), -+ minMatch); - - if (!nbMatches) { - DEBUGLOG(7, "rPos:%u : no match found", cur); - continue; - } - -- { U32 const maxML = matches[nbMatches-1].len; -- DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u", -- inr-istart, cur, nbMatches, maxML); -- -- if ( (maxML > sufficient_len) -- || (cur + maxML >= ZSTD_OPT_NUM) ) { -- lastSequence.mlen = maxML; -- lastSequence.off = matches[nbMatches-1].off; -- lastSequence.litlen = litlen; -- cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */ -- last_pos = cur + ZSTD_totalLen(lastSequence); -- if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */ -+ { U32 const longestML = matches[nbMatches-1].len; -+ DEBUGLOG(7, "cPos:%i==rPos:%u, found %u matches, of longest ML=%u", -+ (int)(inr-istart), cur, nbMatches, longestML); -+ -+ if ( (longestML > sufficient_len) -+ || (cur + longestML >= ZSTD_OPT_NUM) -+ || (ip + cur + longestML >= iend) ) { -+ lastStretch.mlen = longestML; -+ lastStretch.off = matches[nbMatches-1].off; -+ lastStretch.litlen = 0; -+ last_pos = cur + longestML; - goto _shortestPath; - } } - -@@ -1230,20 +1309,25 @@ ZSTD_compressBlock_opt_generic(ZSTD_matc - U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; - U32 mlen; - -- DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u", -- matchNb, matches[matchNb].off, lastML, litlen); -+ DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u", -+ matchNb, matches[matchNb].off, lastML, opt[cur].litlen); - - for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ - U32 const pos = cur + mlen; -- int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); -+ int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); - - if ((pos > last_pos) || (price < opt[pos].price)) { - DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", - pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); -- while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */ -+ while (last_pos < pos) { -+ /* fill empty positions, for future comparisons */ -+ last_pos++; -+ opt[last_pos].price = ZSTD_MAX_PRICE; -+ opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */ -+ } - opt[pos].mlen = mlen; - opt[pos].off = offset; -- opt[pos].litlen = litlen; -+ opt[pos].litlen = 0; - opt[pos].price = price; - } else { - DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", -@@ -1251,55 +1335,89 @@ ZSTD_compressBlock_opt_generic(ZSTD_matc - if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ - } - } } } -+ opt[last_pos+1].price = ZSTD_MAX_PRICE; - } /* for (cur = 1; cur <= last_pos; cur++) */ - -- lastSequence = opt[last_pos]; -- cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */ -- assert(cur < ZSTD_OPT_NUM); /* control overflow*/ -+ lastStretch = opt[last_pos]; -+ assert(cur >= lastStretch.mlen); -+ cur = last_pos - lastStretch.mlen; - - _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ - assert(opt[0].mlen == 0); -+ assert(last_pos >= lastStretch.mlen); -+ assert(cur == last_pos - lastStretch.mlen); - -- /* Set the next chunk's repcodes based on the repcodes of the beginning -- * of the last match, and the last sequence. This avoids us having to -- * update them while traversing the sequences. -- */ -- if (lastSequence.mlen != 0) { -- repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); -- ZSTD_memcpy(rep, &reps, sizeof(reps)); -+ if (lastStretch.mlen==0) { -+ /* no solution : all matches have been converted into literals */ -+ assert(lastStretch.litlen == (ip - anchor) + last_pos); -+ ip += last_pos; -+ continue; -+ } -+ assert(lastStretch.off > 0); -+ -+ /* Update offset history */ -+ if (lastStretch.litlen == 0) { -+ /* finishing on a match : update offset history */ -+ Repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0); -+ ZSTD_memcpy(rep, &reps, sizeof(Repcodes_t)); - } else { -- ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t)); -+ ZSTD_memcpy(rep, lastStretch.rep, sizeof(Repcodes_t)); -+ assert(cur >= lastStretch.litlen); -+ cur -= lastStretch.litlen; - } - -- { U32 const storeEnd = cur + 1; -+ /* Let's write the shortest path solution. -+ * It is stored in @opt in reverse order, -+ * starting from @storeEnd (==cur+2), -+ * effectively partially @opt overwriting. -+ * Content is changed too: -+ * - So far, @opt stored stretches, aka a match followed by literals -+ * - Now, it will store sequences, aka literals followed by a match -+ */ -+ { U32 const storeEnd = cur + 2; - U32 storeStart = storeEnd; -- U32 seqPos = cur; -+ U32 stretchPos = cur; - - DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", - last_pos, cur); (void)last_pos; -- assert(storeEnd < ZSTD_OPT_NUM); -- DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", -- storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off); -- opt[storeEnd] = lastSequence; -- while (seqPos > 0) { -- U32 const backDist = ZSTD_totalLen(opt[seqPos]); -+ assert(storeEnd < ZSTD_OPT_SIZE); -+ DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", -+ storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off); -+ if (lastStretch.litlen > 0) { -+ /* last "sequence" is unfinished: just a bunch of literals */ -+ opt[storeEnd].litlen = lastStretch.litlen; -+ opt[storeEnd].mlen = 0; -+ storeStart = storeEnd-1; -+ opt[storeStart] = lastStretch; -+ } { -+ opt[storeEnd] = lastStretch; /* note: litlen will be fixed */ -+ storeStart = storeEnd; -+ } -+ while (1) { -+ ZSTD_optimal_t nextStretch = opt[stretchPos]; -+ opt[storeStart].litlen = nextStretch.litlen; -+ DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)", -+ opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off); -+ if (nextStretch.mlen == 0) { -+ /* reaching beginning of segment */ -+ break; -+ } - storeStart--; -- DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", -- seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off); -- opt[storeStart] = opt[seqPos]; -- seqPos = (seqPos > backDist) ? seqPos - backDist : 0; -+ opt[storeStart] = nextStretch; /* note: litlen will be fixed */ -+ assert(nextStretch.litlen + nextStretch.mlen <= stretchPos); -+ stretchPos -= nextStretch.litlen + nextStretch.mlen; - } - - /* save sequences */ -- DEBUGLOG(6, "sending selected sequences into seqStore") -+ DEBUGLOG(6, "sending selected sequences into seqStore"); - { U32 storePos; - for (storePos=storeStart; storePos <= storeEnd; storePos++) { - U32 const llen = opt[storePos].litlen; - U32 const mlen = opt[storePos].mlen; -- U32 const offCode = opt[storePos].off; -+ U32 const offBase = opt[storePos].off; - U32 const advance = llen + mlen; -- DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", -- anchor - istart, (unsigned)llen, (unsigned)mlen); -+ DEBUGLOG(6, "considering seq starting at %i, llen=%u, mlen=%u", -+ (int)(anchor - istart), (unsigned)llen, (unsigned)mlen); - - if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ - assert(storePos == storeEnd); /* must be last sequence */ -@@ -1308,11 +1426,14 @@ _shortestPath: /* cur, last_pos, best_ - } - - assert(anchor + llen <= iend); -- ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); -- ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen); -+ ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen); -+ ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen); - anchor += advance; - ip = anchor; - } } -+ DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]); -+ -+ /* update all costs */ - ZSTD_setBasePrices(optStatePtr, optLevel); - } - } /* while (ip < ilimit) */ -@@ -1320,42 +1441,51 @@ _shortestPath: /* cur, last_pos, best_ - /* Return the last literals size */ - return (size_t)(iend - anchor); - } -+#endif /* build exclusions */ - -+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR - static size_t ZSTD_compressBlock_opt0( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) - { - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode); - } -+#endif - -+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR - static size_t ZSTD_compressBlock_opt2( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) - { - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode); - } -+#endif - -+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR - size_t ZSTD_compressBlock_btopt( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) - { - DEBUGLOG(5, "ZSTD_compressBlock_btopt"); - return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict); - } -+#endif - - - - -+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR - /* ZSTD_initStats_ultra(): - * make a first compression pass, just to seed stats with more accurate starting values. - * only works on first block, with no dictionary and no ldm. -- * this function cannot error, hence its contract must be respected. -+ * this function cannot error out, its narrow contract must be respected. - */ --static void --ZSTD_initStats_ultra(ZSTD_matchState_t* ms, -- seqStore_t* seqStore, -- U32 rep[ZSTD_REP_NUM], -- const void* src, size_t srcSize) -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_initStats_ultra(ZSTD_MatchState_t* ms, -+ SeqStore_t* seqStore, -+ U32 rep[ZSTD_REP_NUM], -+ const void* src, size_t srcSize) - { - U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ - ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep)); -@@ -1368,7 +1498,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* - - ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/ - -- /* invalidate first scan from history */ -+ /* invalidate first scan from history, only keep entropy stats */ - ZSTD_resetSeqStore(seqStore); - ms->window.base -= srcSize; - ms->window.dictLimit += (U32)srcSize; -@@ -1378,7 +1508,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* - } - - size_t ZSTD_compressBlock_btultra( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) - { - DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); -@@ -1386,16 +1516,16 @@ size_t ZSTD_compressBlock_btultra( - } - - size_t ZSTD_compressBlock_btultra2( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) - { - U32 const curr = (U32)((const BYTE*)src - ms->window.base); - DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); - -- /* 2-pass strategy: -+ /* 2-passes strategy: - * this strategy makes a first pass over first block to collect statistics -- * and seed next round's statistics with it. -- * After 1st pass, function forgets everything, and starts a new block. -+ * in order to seed next round's statistics with it. -+ * After 1st pass, function forgets history, and starts a new block. - * Consequently, this can only work if no data has been previously loaded in tables, - * aka, no dictionary, no prefix, no ldm preprocessing. - * The compression ratio gain is generally small (~0.5% on first block), -@@ -1404,42 +1534,47 @@ size_t ZSTD_compressBlock_btultra2( - if ( (ms->opt.litLengthSum==0) /* first block */ - && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ - && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ -- && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ -- && (srcSize > ZSTD_PREDEF_THRESHOLD) -+ && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ -+ && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */ - ) { - ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); - } - - return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); - } -+#endif - -+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR - size_t ZSTD_compressBlock_btopt_dictMatchState( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) - { - return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); - } - --size_t ZSTD_compressBlock_btultra_dictMatchState( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_btopt_extDict( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) - { -- return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); -+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict); - } -+#endif - --size_t ZSTD_compressBlock_btopt_extDict( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_btultra_dictMatchState( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) - { -- return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict); -+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); - } - - size_t ZSTD_compressBlock_btultra_extDict( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) - { - return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict); - } -+#endif - - /* note : no btultra2 variant for extDict nor dictMatchState, - * because btultra2 is not meant to work with dictionaries ---- a/lib/zstd/compress/zstd_opt.h -+++ b/lib/zstd/compress/zstd_opt.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -11,40 +12,62 @@ - #ifndef ZSTD_OPT_H - #define ZSTD_OPT_H - -- - #include "zstd_compress_internal.h" - -+#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) - /* used in ZSTD_loadDictionaryContent() */ --void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); -+void ZSTD_updateTree(ZSTD_MatchState_t* ms, const BYTE* ip, const BYTE* iend); -+#endif - -+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR - size_t ZSTD_compressBlock_btopt( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_btultra( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_btopt_dictMatchState( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_btultra2( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+size_t ZSTD_compressBlock_btopt_extDict( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - -+#define ZSTD_COMPRESSBLOCK_BTOPT ZSTD_compressBlock_btopt -+#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE ZSTD_compressBlock_btopt_dictMatchState -+#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT ZSTD_compressBlock_btopt_extDict -+#else -+#define ZSTD_COMPRESSBLOCK_BTOPT NULL -+#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE NULL -+#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT NULL -+#endif - --size_t ZSTD_compressBlock_btopt_dictMatchState( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_btultra( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - size_t ZSTD_compressBlock_btultra_dictMatchState( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -- void const* src, size_t srcSize); -- --size_t ZSTD_compressBlock_btopt_extDict( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - size_t ZSTD_compressBlock_btultra_extDict( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - - /* note : no btultra2 variant for extDict nor dictMatchState, - * because btultra2 is not meant to work with dictionaries - * and is only specific for the first block (no prefix) */ -+size_t ZSTD_compressBlock_btultra2( -+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ void const* src, size_t srcSize); - -+#define ZSTD_COMPRESSBLOCK_BTULTRA ZSTD_compressBlock_btultra -+#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE ZSTD_compressBlock_btultra_dictMatchState -+#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT ZSTD_compressBlock_btultra_extDict -+#define ZSTD_COMPRESSBLOCK_BTULTRA2 ZSTD_compressBlock_btultra2 -+#else -+#define ZSTD_COMPRESSBLOCK_BTULTRA NULL -+#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE NULL -+#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT NULL -+#define ZSTD_COMPRESSBLOCK_BTULTRA2 NULL -+#endif - - #endif /* ZSTD_OPT_H */ ---- /dev/null -+++ b/lib/zstd/compress/zstd_preSplit.c -@@ -0,0 +1,239 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause -+/* -+ * Copyright (c) Meta Platforms, Inc. and affiliates. -+ * All rights reserved. -+ * -+ * This source code is licensed under both the BSD-style license (found in the -+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found -+ * in the COPYING file in the root directory of this source tree). -+ * You may select, at your option, one of the above-listed licenses. -+ */ -+ -+#include "../common/compiler.h" /* ZSTD_ALIGNOF */ -+#include "../common/mem.h" /* S64 */ -+#include "../common/zstd_deps.h" /* ZSTD_memset */ -+#include "../common/zstd_internal.h" /* ZSTD_STATIC_ASSERT */ -+#include "hist.h" /* HIST_add */ -+#include "zstd_preSplit.h" -+ -+ -+#define BLOCKSIZE_MIN 3500 -+#define THRESHOLD_PENALTY_RATE 16 -+#define THRESHOLD_BASE (THRESHOLD_PENALTY_RATE - 2) -+#define THRESHOLD_PENALTY 3 -+ -+#define HASHLENGTH 2 -+#define HASHLOG_MAX 10 -+#define HASHTABLESIZE (1 << HASHLOG_MAX) -+#define HASHMASK (HASHTABLESIZE - 1) -+#define KNUTH 0x9e3779b9 -+ -+/* for hashLog > 8, hash 2 bytes. -+ * for hashLog == 8, just take the byte, no hashing. -+ * The speed of this method relies on compile-time constant propagation */ -+FORCE_INLINE_TEMPLATE unsigned hash2(const void *p, unsigned hashLog) -+{ -+ assert(hashLog >= 8); -+ if (hashLog == 8) return (U32)((const BYTE*)p)[0]; -+ assert(hashLog <= HASHLOG_MAX); -+ return (U32)(MEM_read16(p)) * KNUTH >> (32 - hashLog); -+} -+ -+ -+typedef struct { -+ unsigned events[HASHTABLESIZE]; -+ size_t nbEvents; -+} Fingerprint; -+typedef struct { -+ Fingerprint pastEvents; -+ Fingerprint newEvents; -+} FPStats; -+ -+static void initStats(FPStats* fpstats) -+{ -+ ZSTD_memset(fpstats, 0, sizeof(FPStats)); -+} -+ -+FORCE_INLINE_TEMPLATE void -+addEvents_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog) -+{ -+ const char* p = (const char*)src; -+ size_t limit = srcSize - HASHLENGTH + 1; -+ size_t n; -+ assert(srcSize >= HASHLENGTH); -+ for (n = 0; n < limit; n+=samplingRate) { -+ fp->events[hash2(p+n, hashLog)]++; -+ } -+ fp->nbEvents += limit/samplingRate; -+} -+ -+FORCE_INLINE_TEMPLATE void -+recordFingerprint_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog) -+{ -+ ZSTD_memset(fp, 0, sizeof(unsigned) * ((size_t)1 << hashLog)); -+ fp->nbEvents = 0; -+ addEvents_generic(fp, src, srcSize, samplingRate, hashLog); -+} -+ -+typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize); -+ -+#define FP_RECORD(_rate) ZSTD_recordFingerprint_##_rate -+ -+#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize) \ -+ static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \ -+ { \ -+ recordFingerprint_generic(fp, src, srcSize, _rate, _hSize); \ -+ } -+ -+ZSTD_GEN_RECORD_FINGERPRINT(1, 10) -+ZSTD_GEN_RECORD_FINGERPRINT(5, 10) -+ZSTD_GEN_RECORD_FINGERPRINT(11, 9) -+ZSTD_GEN_RECORD_FINGERPRINT(43, 8) -+ -+ -+static U64 abs64(S64 s64) { return (U64)((s64 < 0) ? -s64 : s64); } -+ -+static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2, unsigned hashLog) -+{ -+ U64 distance = 0; -+ size_t n; -+ assert(hashLog <= HASHLOG_MAX); -+ for (n = 0; n < ((size_t)1 << hashLog); n++) { -+ distance += -+ abs64((S64)fp1->events[n] * (S64)fp2->nbEvents - (S64)fp2->events[n] * (S64)fp1->nbEvents); -+ } -+ return distance; -+} -+ -+/* Compare newEvents with pastEvents -+ * return 1 when considered "too different" -+ */ -+static int compareFingerprints(const Fingerprint* ref, -+ const Fingerprint* newfp, -+ int penalty, -+ unsigned hashLog) -+{ -+ assert(ref->nbEvents > 0); -+ assert(newfp->nbEvents > 0); -+ { U64 p50 = (U64)ref->nbEvents * (U64)newfp->nbEvents; -+ U64 deviation = fpDistance(ref, newfp, hashLog); -+ U64 threshold = p50 * (U64)(THRESHOLD_BASE + penalty) / THRESHOLD_PENALTY_RATE; -+ return deviation >= threshold; -+ } -+} -+ -+static void mergeEvents(Fingerprint* acc, const Fingerprint* newfp) -+{ -+ size_t n; -+ for (n = 0; n < HASHTABLESIZE; n++) { -+ acc->events[n] += newfp->events[n]; -+ } -+ acc->nbEvents += newfp->nbEvents; -+} -+ -+static void flushEvents(FPStats* fpstats) -+{ -+ size_t n; -+ for (n = 0; n < HASHTABLESIZE; n++) { -+ fpstats->pastEvents.events[n] = fpstats->newEvents.events[n]; -+ } -+ fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents; -+ ZSTD_memset(&fpstats->newEvents, 0, sizeof(fpstats->newEvents)); -+} -+ -+static void removeEvents(Fingerprint* acc, const Fingerprint* slice) -+{ -+ size_t n; -+ for (n = 0; n < HASHTABLESIZE; n++) { -+ assert(acc->events[n] >= slice->events[n]); -+ acc->events[n] -= slice->events[n]; -+ } -+ acc->nbEvents -= slice->nbEvents; -+} -+ -+#define CHUNKSIZE (8 << 10) -+static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize, -+ int level, -+ void* workspace, size_t wkspSize) -+{ -+ static const RecordEvents_f records_fs[] = { -+ FP_RECORD(43), FP_RECORD(11), FP_RECORD(5), FP_RECORD(1) -+ }; -+ static const unsigned hashParams[] = { 8, 9, 10, 10 }; -+ const RecordEvents_f record_f = (assert(0<=level && level<=3), records_fs[level]); -+ FPStats* const fpstats = (FPStats*)workspace; -+ const char* p = (const char*)blockStart; -+ int penalty = THRESHOLD_PENALTY; -+ size_t pos = 0; -+ assert(blockSize == (128 << 10)); -+ assert(workspace != NULL); -+ assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0); -+ ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats)); -+ assert(wkspSize >= sizeof(FPStats)); (void)wkspSize; -+ -+ initStats(fpstats); -+ record_f(&fpstats->pastEvents, p, CHUNKSIZE); -+ for (pos = CHUNKSIZE; pos <= blockSize - CHUNKSIZE; pos += CHUNKSIZE) { -+ record_f(&fpstats->newEvents, p + pos, CHUNKSIZE); -+ if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty, hashParams[level])) { -+ return pos; -+ } else { -+ mergeEvents(&fpstats->pastEvents, &fpstats->newEvents); -+ if (penalty > 0) penalty--; -+ } -+ } -+ assert(pos == blockSize); -+ return blockSize; -+ (void)flushEvents; (void)removeEvents; -+} -+ -+/* ZSTD_splitBlock_fromBorders(): very fast strategy : -+ * compare fingerprint from beginning and end of the block, -+ * derive from their difference if it's preferable to split in the middle, -+ * repeat the process a second time, for finer grained decision. -+ * 3 times did not brought improvements, so I stopped at 2. -+ * Benefits are good enough for a cheap heuristic. -+ * More accurate splitting saves more, but speed impact is also more perceptible. -+ * For better accuracy, use more elaborate variant *_byChunks. -+ */ -+static size_t ZSTD_splitBlock_fromBorders(const void* blockStart, size_t blockSize, -+ void* workspace, size_t wkspSize) -+{ -+#define SEGMENT_SIZE 512 -+ FPStats* const fpstats = (FPStats*)workspace; -+ Fingerprint* middleEvents = (Fingerprint*)(void*)((char*)workspace + 512 * sizeof(unsigned)); -+ assert(blockSize == (128 << 10)); -+ assert(workspace != NULL); -+ assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0); -+ ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats)); -+ assert(wkspSize >= sizeof(FPStats)); (void)wkspSize; -+ -+ initStats(fpstats); -+ HIST_add(fpstats->pastEvents.events, blockStart, SEGMENT_SIZE); -+ HIST_add(fpstats->newEvents.events, (const char*)blockStart + blockSize - SEGMENT_SIZE, SEGMENT_SIZE); -+ fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents = SEGMENT_SIZE; -+ if (!compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, 0, 8)) -+ return blockSize; -+ -+ HIST_add(middleEvents->events, (const char*)blockStart + blockSize/2 - SEGMENT_SIZE/2, SEGMENT_SIZE); -+ middleEvents->nbEvents = SEGMENT_SIZE; -+ { U64 const distFromBegin = fpDistance(&fpstats->pastEvents, middleEvents, 8); -+ U64 const distFromEnd = fpDistance(&fpstats->newEvents, middleEvents, 8); -+ U64 const minDistance = SEGMENT_SIZE * SEGMENT_SIZE / 3; -+ if (abs64((S64)distFromBegin - (S64)distFromEnd) < minDistance) -+ return 64 KB; -+ return (distFromBegin > distFromEnd) ? 32 KB : 96 KB; -+ } -+} -+ -+size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize, -+ int level, -+ void* workspace, size_t wkspSize) -+{ -+ DEBUGLOG(6, "ZSTD_splitBlock (level=%i)", level); -+ assert(0<=level && level<=4); -+ if (level == 0) -+ return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize); -+ /* level >= 1*/ -+ return ZSTD_splitBlock_byChunks(blockStart, blockSize, level-1, workspace, wkspSize); -+} ---- /dev/null -+++ b/lib/zstd/compress/zstd_preSplit.h -@@ -0,0 +1,34 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ -+/* -+ * Copyright (c) Meta Platforms, Inc. and affiliates. -+ * All rights reserved. -+ * -+ * This source code is licensed under both the BSD-style license (found in the -+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found -+ * in the COPYING file in the root directory of this source tree). -+ * You may select, at your option, one of the above-listed licenses. -+ */ -+ -+#ifndef ZSTD_PRESPLIT_H -+#define ZSTD_PRESPLIT_H -+ -+#include /* size_t */ -+ -+#define ZSTD_SLIPBLOCK_WORKSPACESIZE 8208 -+ -+/* ZSTD_splitBlock(): -+ * @level must be a value between 0 and 4. -+ * higher levels spend more energy to detect block boundaries. -+ * @workspace must be aligned for size_t. -+ * @wkspSize must be at least >= ZSTD_SLIPBLOCK_WORKSPACESIZE -+ * note: -+ * For the time being, this function only accepts full 128 KB blocks. -+ * Therefore, @blockSize must be == 128 KB. -+ * While this could be extended to smaller sizes in the future, -+ * it is not yet clear if this would be useful. TBD. -+ */ -+size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize, -+ int level, -+ void* workspace, size_t wkspSize); -+ -+#endif /* ZSTD_PRESPLIT_H */ ---- a/lib/zstd/decompress/huf_decompress.c -+++ b/lib/zstd/decompress/huf_decompress.c -@@ -1,7 +1,8 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* ****************************************************************** - * huff0 huffman decoder, - * part of Finite State Entropy library -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * You can contact the author at : - * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy -@@ -19,10 +20,10 @@ - #include "../common/compiler.h" - #include "../common/bitstream.h" /* BIT_* */ - #include "../common/fse.h" /* to compress headers */ --#define HUF_STATIC_LINKING_ONLY - #include "../common/huf.h" - #include "../common/error_private.h" - #include "../common/zstd_internal.h" -+#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_countTrailingZeros64 */ - - /* ************************************************************** - * Constants -@@ -34,6 +35,12 @@ - * Macros - ****************************************************************/ - -+#ifdef HUF_DISABLE_FAST_DECODE -+# define HUF_ENABLE_FAST_DECODE 0 -+#else -+# define HUF_ENABLE_FAST_DECODE 1 -+#endif -+ - /* These two optional macros force the use one way or another of the two - * Huffman decompression implementations. You can't force in both directions - * at the same time. -@@ -43,27 +50,25 @@ - #error "Cannot force the use of the X1 and X2 decoders at the same time!" - #endif - --#if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2 --# define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE -+/* When DYNAMIC_BMI2 is enabled, fast decoders are only called when bmi2 is -+ * supported at runtime, so we can add the BMI2 target attribute. -+ * When it is disabled, we will still get BMI2 if it is enabled statically. -+ */ -+#if DYNAMIC_BMI2 -+# define HUF_FAST_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE - #else --# define HUF_ASM_X86_64_BMI2_ATTRS -+# define HUF_FAST_BMI2_ATTRS - #endif - - #define HUF_EXTERN_C - #define HUF_ASM_DECL HUF_EXTERN_C - --#if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)) -+#if DYNAMIC_BMI2 - # define HUF_NEED_BMI2_FUNCTION 1 - #else - # define HUF_NEED_BMI2_FUNCTION 0 - #endif - --#if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)) --# define HUF_NEED_DEFAULT_FUNCTION 1 --#else --# define HUF_NEED_DEFAULT_FUNCTION 0 --#endif -- - /* ************************************************************** - * Error Management - ****************************************************************/ -@@ -80,6 +85,11 @@ - /* ************************************************************** - * BMI2 Variant Wrappers - ****************************************************************/ -+typedef size_t (*HUF_DecompressUsingDTableFn)(void *dst, size_t dstSize, -+ const void *cSrc, -+ size_t cSrcSize, -+ const HUF_DTable *DTable); -+ - #if DYNAMIC_BMI2 - - #define HUF_DGEN(fn) \ -@@ -101,9 +111,9 @@ - } \ - \ - static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ -- size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ -+ size_t cSrcSize, HUF_DTable const* DTable, int flags) \ - { \ -- if (bmi2) { \ -+ if (flags & HUF_flags_bmi2) { \ - return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ -@@ -113,9 +123,9 @@ - - #define HUF_DGEN(fn) \ - static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ -- size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ -+ size_t cSrcSize, HUF_DTable const* DTable, int flags) \ - { \ -- (void)bmi2; \ -+ (void)flags; \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ - } - -@@ -134,43 +144,66 @@ static DTableDesc HUF_getDTableDesc(cons - return dtd; - } - --#if ZSTD_ENABLE_ASM_X86_64_BMI2 -- --static size_t HUF_initDStream(BYTE const* ip) { -+static size_t HUF_initFastDStream(BYTE const* ip) { - BYTE const lastByte = ip[7]; -- size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; -+ size_t const bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; - size_t const value = MEM_readLEST(ip) | 1; - assert(bitsConsumed <= 8); -+ assert(sizeof(size_t) == 8); - return value << bitsConsumed; - } -+ -+ -+/* -+ * The input/output arguments to the Huffman fast decoding loop: -+ * -+ * ip [in/out] - The input pointers, must be updated to reflect what is consumed. -+ * op [in/out] - The output pointers, must be updated to reflect what is written. -+ * bits [in/out] - The bitstream containers, must be updated to reflect the current state. -+ * dt [in] - The decoding table. -+ * ilowest [in] - The beginning of the valid range of the input. Decoders may read -+ * down to this pointer. It may be below iend[0]. -+ * oend [in] - The end of the output stream. op[3] must not cross oend. -+ * iend [in] - The end of each input stream. ip[i] may cross iend[i], -+ * as long as it is above ilowest, but that indicates corruption. -+ */ - typedef struct { - BYTE const* ip[4]; - BYTE* op[4]; - U64 bits[4]; - void const* dt; -- BYTE const* ilimit; -+ BYTE const* ilowest; - BYTE* oend; - BYTE const* iend[4]; --} HUF_DecompressAsmArgs; -+} HUF_DecompressFastArgs; -+ -+typedef void (*HUF_DecompressFastLoopFn)(HUF_DecompressFastArgs*); - - /* -- * Initializes args for the asm decoding loop. -- * @returns 0 on success -- * 1 if the fallback implementation should be used. -+ * Initializes args for the fast decoding loop. -+ * @returns 1 on success -+ * 0 if the fallback implementation should be used. - * Or an error code on failure. - */ --static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable) -+static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable) - { - void const* dt = DTable + 1; - U32 const dtLog = HUF_getDTableDesc(DTable).tableLog; - -- const BYTE* const ilimit = (const BYTE*)src + 6 + 8; -+ const BYTE* const istart = (const BYTE*)src; - -- BYTE* const oend = (BYTE*)dst + dstSize; -+ BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize); - -- /* The following condition is false on x32 platform, -- * but HUF_asm is not compatible with this ABI */ -- if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1; -+ /* The fast decoding loop assumes 64-bit little-endian. -+ * This condition is false on x32. -+ */ -+ if (!MEM_isLittleEndian() || MEM_32bits()) -+ return 0; -+ -+ /* Avoid nullptr addition */ -+ if (dstSize == 0) -+ return 0; -+ assert(dst != NULL); - - /* strict minimum : jump table + 1 byte per stream */ - if (srcSize < 10) -@@ -181,11 +214,10 @@ static size_t HUF_DecompressAsmArgs_init - * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder. - */ - if (dtLog != HUF_DECODER_FAST_TABLELOG) -- return 1; -+ return 0; - - /* Read the jump table. */ - { -- const BYTE* const istart = (const BYTE*)src; - size_t const length1 = MEM_readLE16(istart); - size_t const length2 = MEM_readLE16(istart+2); - size_t const length3 = MEM_readLE16(istart+4); -@@ -195,13 +227,11 @@ static size_t HUF_DecompressAsmArgs_init - args->iend[2] = args->iend[1] + length2; - args->iend[3] = args->iend[2] + length3; - -- /* HUF_initDStream() requires this, and this small of an input -+ /* HUF_initFastDStream() requires this, and this small of an input - * won't benefit from the ASM loop anyways. -- * length1 must be >= 16 so that ip[0] >= ilimit before the loop -- * starts. - */ -- if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8) -- return 1; -+ if (length1 < 8 || length2 < 8 || length3 < 8 || length4 < 8) -+ return 0; - if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */ - } - /* ip[] contains the position that is currently loaded into bits[]. */ -@@ -218,7 +248,7 @@ static size_t HUF_DecompressAsmArgs_init - - /* No point to call the ASM loop for tiny outputs. */ - if (args->op[3] >= oend) -- return 1; -+ return 0; - - /* bits[] is the bit container. - * It is read from the MSB down to the LSB. -@@ -227,24 +257,25 @@ static size_t HUF_DecompressAsmArgs_init - * set, so that CountTrailingZeros(bits[]) can be used - * to count how many bits we've consumed. - */ -- args->bits[0] = HUF_initDStream(args->ip[0]); -- args->bits[1] = HUF_initDStream(args->ip[1]); -- args->bits[2] = HUF_initDStream(args->ip[2]); -- args->bits[3] = HUF_initDStream(args->ip[3]); -- -- /* If ip[] >= ilimit, it is guaranteed to be safe to -- * reload bits[]. It may be beyond its section, but is -- * guaranteed to be valid (>= istart). -- */ -- args->ilimit = ilimit; -+ args->bits[0] = HUF_initFastDStream(args->ip[0]); -+ args->bits[1] = HUF_initFastDStream(args->ip[1]); -+ args->bits[2] = HUF_initFastDStream(args->ip[2]); -+ args->bits[3] = HUF_initFastDStream(args->ip[3]); -+ -+ /* The decoders must be sure to never read beyond ilowest. -+ * This is lower than iend[0], but allowing decoders to read -+ * down to ilowest can allow an extra iteration or two in the -+ * fast loop. -+ */ -+ args->ilowest = istart; - - args->oend = oend; - args->dt = dt; - -- return 0; -+ return 1; - } - --static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd) -+static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArgs const* args, int stream, BYTE* segmentEnd) - { - /* Validate that we haven't overwritten. */ - if (args->op[stream] > segmentEnd) -@@ -258,15 +289,33 @@ static size_t HUF_initRemainingDStream(B - return ERROR(corruption_detected); - - /* Construct the BIT_DStream_t. */ -- bit->bitContainer = MEM_readLE64(args->ip[stream]); -- bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]); -- bit->start = (const char*)args->iend[0]; -+ assert(sizeof(size_t) == 8); -+ bit->bitContainer = MEM_readLEST(args->ip[stream]); -+ bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]); -+ bit->start = (const char*)args->ilowest; - bit->limitPtr = bit->start + sizeof(size_t); - bit->ptr = (const char*)args->ip[stream]; - - return 0; - } --#endif -+ -+/* Calls X(N) for each stream 0, 1, 2, 3. */ -+#define HUF_4X_FOR_EACH_STREAM(X) \ -+ do { \ -+ X(0); \ -+ X(1); \ -+ X(2); \ -+ X(3); \ -+ } while (0) -+ -+/* Calls X(N, var) for each stream 0, 1, 2, 3. */ -+#define HUF_4X_FOR_EACH_STREAM_WITH_VAR(X, var) \ -+ do { \ -+ X(0, (var)); \ -+ X(1, (var)); \ -+ X(2, (var)); \ -+ X(3, (var)); \ -+ } while (0) - - - #ifndef HUF_FORCE_DECOMPRESS_X2 -@@ -283,10 +332,11 @@ typedef struct { BYTE nbBits; BYTE byte; - static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { - U64 D4; - if (MEM_isLittleEndian()) { -- D4 = (symbol << 8) + nbBits; -+ D4 = (U64)((symbol << 8) + nbBits); - } else { -- D4 = symbol + (nbBits << 8); -+ D4 = (U64)(symbol + (nbBits << 8)); - } -+ assert(D4 < (1U << 16)); - D4 *= 0x0001000100010001ULL; - return D4; - } -@@ -329,13 +379,7 @@ typedef struct { - BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; - } HUF_ReadDTableX1_Workspace; - -- --size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) --{ -- return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); --} -- --size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2) -+size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags) - { - U32 tableLog = 0; - U32 nbSymbols = 0; -@@ -350,7 +394,7 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DT - DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); - /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ - -- iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); -+ iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), flags); - if (HUF_isError(iSize)) return iSize; - - -@@ -377,9 +421,8 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DT - * rankStart[0] is not filled because there are no entries in the table for - * weight 0. - */ -- { -- int n; -- int nextRankStart = 0; -+ { int n; -+ U32 nextRankStart = 0; - int const unroll = 4; - int const nLimit = (int)nbSymbols - unroll + 1; - for (n=0; n<(int)tableLog+1; n++) { -@@ -406,10 +449,9 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DT - * We can switch based on the length to a different inner loop which is - * optimized for that particular case. - */ -- { -- U32 w; -- int symbol=wksp->rankVal[0]; -- int rankStart=0; -+ { U32 w; -+ int symbol = wksp->rankVal[0]; -+ int rankStart = 0; - for (w=1; wrankVal[w]; - int const length = (1 << w) >> 1; -@@ -483,15 +525,19 @@ HUF_decodeSymbolX1(BIT_DStream_t* Dstrea - } - - #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ -- *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) -+ do { *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog); } while (0) - --#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ -- if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ -- HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) -- --#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ -- if (MEM_64bits()) \ -- HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) -+#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ -+ do { \ -+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ -+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \ -+ } while (0) -+ -+#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ -+ do { \ -+ if (MEM_64bits()) \ -+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \ -+ } while (0) - - HINT_INLINE size_t - HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) -@@ -519,7 +565,7 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_ - while (p < pEnd) - HUF_DECODE_SYMBOLX1_0(p, bitDPtr); - -- return pEnd-pStart; -+ return (size_t)(pEnd-pStart); - } - - FORCE_INLINE_TEMPLATE size_t -@@ -529,7 +575,7 @@ HUF_decompress1X1_usingDTable_internal_b - const HUF_DTable* DTable) - { - BYTE* op = (BYTE*)dst; -- BYTE* const oend = op + dstSize; -+ BYTE* const oend = ZSTD_maybeNullPtrAdd(op, dstSize); - const void* dtPtr = DTable + 1; - const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; - BIT_DStream_t bitD; -@@ -545,6 +591,10 @@ HUF_decompress1X1_usingDTable_internal_b - return dstSize; - } - -+/* HUF_decompress4X1_usingDTable_internal_body(): -+ * Conditions : -+ * @dstSize >= 6 -+ */ - FORCE_INLINE_TEMPLATE size_t - HUF_decompress4X1_usingDTable_internal_body( - void* dst, size_t dstSize, -@@ -553,6 +603,7 @@ HUF_decompress4X1_usingDTable_internal_b - { - /* Check */ - if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ -+ if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */ - - { const BYTE* const istart = (const BYTE*) cSrc; - BYTE* const ostart = (BYTE*) dst; -@@ -588,6 +639,7 @@ HUF_decompress4X1_usingDTable_internal_b - - if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ -+ assert(dstSize >= 6); /* validated above */ - CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); - CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); - CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); -@@ -650,52 +702,173 @@ size_t HUF_decompress4X1_usingDTable_int - } - #endif - --#if HUF_NEED_DEFAULT_FUNCTION - static - size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc, - size_t cSrcSize, HUF_DTable const* DTable) { - return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); - } --#endif - - #if ZSTD_ENABLE_ASM_X86_64_BMI2 - --HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN; -+HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN; -+ -+#endif -+ -+static HUF_FAST_BMI2_ATTRS -+void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args) -+{ -+ U64 bits[4]; -+ BYTE const* ip[4]; -+ BYTE* op[4]; -+ U16 const* const dtable = (U16 const*)args->dt; -+ BYTE* const oend = args->oend; -+ BYTE const* const ilowest = args->ilowest; -+ -+ /* Copy the arguments to local variables */ -+ ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); -+ ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip)); -+ ZSTD_memcpy(&op, &args->op, sizeof(op)); -+ -+ assert(MEM_isLittleEndian()); -+ assert(!MEM_32bits()); -+ -+ for (;;) { -+ BYTE* olimit; -+ int stream; -+ -+ /* Assert loop preconditions */ -+#ifndef NDEBUG -+ for (stream = 0; stream < 4; ++stream) { -+ assert(op[stream] <= (stream == 3 ? oend : op[stream + 1])); -+ assert(ip[stream] >= ilowest); -+ } -+#endif -+ /* Compute olimit */ -+ { -+ /* Each iteration produces 5 output symbols per stream */ -+ size_t const oiters = (size_t)(oend - op[3]) / 5; -+ /* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes -+ * per stream. -+ */ -+ size_t const iiters = (size_t)(ip[0] - ilowest) / 7; -+ /* We can safely run iters iterations before running bounds checks */ -+ size_t const iters = MIN(oiters, iiters); -+ size_t const symbols = iters * 5; -+ -+ /* We can simply check that op[3] < olimit, instead of checking all -+ * of our bounds, since we can't hit the other bounds until we've run -+ * iters iterations, which only happens when op[3] == olimit. -+ */ -+ olimit = op[3] + symbols; -+ -+ /* Exit fast decoding loop once we reach the end. */ -+ if (op[3] == olimit) -+ break; -+ -+ /* Exit the decoding loop if any input pointer has crossed the -+ * previous one. This indicates corruption, and a precondition -+ * to our loop is that ip[i] >= ip[0]. -+ */ -+ for (stream = 1; stream < 4; ++stream) { -+ if (ip[stream] < ip[stream - 1]) -+ goto _out; -+ } -+ } -+ -+#ifndef NDEBUG -+ for (stream = 1; stream < 4; ++stream) { -+ assert(ip[stream] >= ip[stream - 1]); -+ } -+#endif -+ -+#define HUF_4X1_DECODE_SYMBOL(_stream, _symbol) \ -+ do { \ -+ int const index = (int)(bits[(_stream)] >> 53); \ -+ int const entry = (int)dtable[index]; \ -+ bits[(_stream)] <<= (entry & 0x3F); \ -+ op[(_stream)][(_symbol)] = (BYTE)((entry >> 8) & 0xFF); \ -+ } while (0) -+ -+#define HUF_4X1_RELOAD_STREAM(_stream) \ -+ do { \ -+ int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \ -+ int const nbBits = ctz & 7; \ -+ int const nbBytes = ctz >> 3; \ -+ op[(_stream)] += 5; \ -+ ip[(_stream)] -= nbBytes; \ -+ bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \ -+ bits[(_stream)] <<= nbBits; \ -+ } while (0) -+ -+ /* Manually unroll the loop because compilers don't consistently -+ * unroll the inner loops, which destroys performance. -+ */ -+ do { -+ /* Decode 5 symbols in each of the 4 streams */ -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0); -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1); -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2); -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3); -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4); -+ -+ /* Reload each of the 4 the bitstreams */ -+ HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM); -+ } while (op[3] < olimit); -+ -+#undef HUF_4X1_DECODE_SYMBOL -+#undef HUF_4X1_RELOAD_STREAM -+ } -+ -+_out: -+ -+ /* Save the final values of each of the state variables back to args. */ -+ ZSTD_memcpy(&args->bits, &bits, sizeof(bits)); -+ ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip)); -+ ZSTD_memcpy(&args->op, &op, sizeof(op)); -+} - --static HUF_ASM_X86_64_BMI2_ATTRS -+/* -+ * @returns @p dstSize on success (>= 6) -+ * 0 if the fallback implementation should be used -+ * An error if an error occurred -+ */ -+static HUF_FAST_BMI2_ATTRS - size_t --HUF_decompress4X1_usingDTable_internal_bmi2_asm( -+HUF_decompress4X1_usingDTable_internal_fast( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, -- const HUF_DTable* DTable) -+ const HUF_DTable* DTable, -+ HUF_DecompressFastLoopFn loopFn) - { - void const* dt = DTable + 1; -- const BYTE* const iend = (const BYTE*)cSrc + 6; -- BYTE* const oend = (BYTE*)dst + dstSize; -- HUF_DecompressAsmArgs args; -- { -- size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); -- FORWARD_IF_ERROR(ret, "Failed to init asm args"); -- if (ret != 0) -- return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); -+ BYTE const* const ilowest = (BYTE const*)cSrc; -+ BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize); -+ HUF_DecompressFastArgs args; -+ { size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); -+ FORWARD_IF_ERROR(ret, "Failed to init fast loop args"); -+ if (ret == 0) -+ return 0; - } - -- assert(args.ip[0] >= args.ilimit); -- HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args); -+ assert(args.ip[0] >= args.ilowest); -+ loopFn(&args); - -- /* Our loop guarantees that ip[] >= ilimit and that we haven't -+ /* Our loop guarantees that ip[] >= ilowest and that we haven't - * overwritten any op[]. - */ -- assert(args.ip[0] >= iend); -- assert(args.ip[1] >= iend); -- assert(args.ip[2] >= iend); -- assert(args.ip[3] >= iend); -+ assert(args.ip[0] >= ilowest); -+ assert(args.ip[0] >= ilowest); -+ assert(args.ip[1] >= ilowest); -+ assert(args.ip[2] >= ilowest); -+ assert(args.ip[3] >= ilowest); - assert(args.op[3] <= oend); -- (void)iend; -+ -+ assert(ilowest == args.ilowest); -+ assert(ilowest + 6 == args.iend[0]); -+ (void)ilowest; - - /* finish bit streams one by one. */ -- { -- size_t const segmentSize = (dstSize+3) / 4; -+ { size_t const segmentSize = (dstSize+3) / 4; - BYTE* segmentEnd = (BYTE*)dst; - int i; - for (i = 0; i < 4; ++i) { -@@ -712,97 +885,59 @@ HUF_decompress4X1_usingDTable_internal_b - } - - /* decoded size */ -+ assert(dstSize != 0); - return dstSize; - } --#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */ -- --typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, -- const void *cSrc, -- size_t cSrcSize, -- const HUF_DTable *DTable); - - HUF_DGEN(HUF_decompress1X1_usingDTable_internal) - - static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc, -- size_t cSrcSize, HUF_DTable const* DTable, int bmi2) -+ size_t cSrcSize, HUF_DTable const* DTable, int flags) - { -+ HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X1_usingDTable_internal_default; -+ HUF_DecompressFastLoopFn loopFn = HUF_decompress4X1_usingDTable_internal_fast_c_loop; -+ - #if DYNAMIC_BMI2 -- if (bmi2) { -+ if (flags & HUF_flags_bmi2) { -+ fallbackFn = HUF_decompress4X1_usingDTable_internal_bmi2; - # if ZSTD_ENABLE_ASM_X86_64_BMI2 -- return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); --# else -- return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); -+ if (!(flags & HUF_flags_disableAsm)) { -+ loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop; -+ } - # endif -+ } else { -+ return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable); - } --#else -- (void)bmi2; - #endif - - #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__) -- return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); --#else -- return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable); -+ if (!(flags & HUF_flags_disableAsm)) { -+ loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop; -+ } - #endif --} -- -- --size_t HUF_decompress1X1_usingDTable( -- void* dst, size_t dstSize, -- const void* cSrc, size_t cSrcSize, -- const HUF_DTable* DTable) --{ -- DTableDesc dtd = HUF_getDTableDesc(DTable); -- if (dtd.tableType != 0) return ERROR(GENERIC); -- return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); --} - --size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, -- const void* cSrc, size_t cSrcSize, -- void* workSpace, size_t wkspSize) --{ -- const BYTE* ip = (const BYTE*) cSrc; -- -- size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); -- if (HUF_isError(hSize)) return hSize; -- if (hSize >= cSrcSize) return ERROR(srcSize_wrong); -- ip += hSize; cSrcSize -= hSize; -- -- return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); --} -- -- --size_t HUF_decompress4X1_usingDTable( -- void* dst, size_t dstSize, -- const void* cSrc, size_t cSrcSize, -- const HUF_DTable* DTable) --{ -- DTableDesc dtd = HUF_getDTableDesc(DTable); -- if (dtd.tableType != 0) return ERROR(GENERIC); -- return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -+ if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) { -+ size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn); -+ if (ret != 0) -+ return ret; -+ } -+ return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable); - } - --static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, -+static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, -- void* workSpace, size_t wkspSize, int bmi2) -+ void* workSpace, size_t wkspSize, int flags) - { - const BYTE* ip = (const BYTE*) cSrc; - -- size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -+ size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - -- return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); -+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags); - } - --size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, -- const void* cSrc, size_t cSrcSize, -- void* workSpace, size_t wkspSize) --{ -- return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); --} -- -- - #endif /* HUF_FORCE_DECOMPRESS_X2 */ - - -@@ -985,7 +1120,7 @@ static void HUF_fillDTableX2Level2(HUF_D - - static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, - const sortedSymbol_t* sortedList, -- const U32* rankStart, rankValCol_t *rankValOrigin, const U32 maxWeight, -+ const U32* rankStart, rankValCol_t* rankValOrigin, const U32 maxWeight, - const U32 nbBitsBaseline) - { - U32* const rankVal = rankValOrigin[0]; -@@ -1040,14 +1175,7 @@ typedef struct { - - size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, - const void* src, size_t srcSize, -- void* workSpace, size_t wkspSize) --{ -- return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); --} -- --size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, -- const void* src, size_t srcSize, -- void* workSpace, size_t wkspSize, int bmi2) -+ void* workSpace, size_t wkspSize, int flags) - { - U32 tableLog, maxW, nbSymbols; - DTableDesc dtd = HUF_getDTableDesc(DTable); -@@ -1069,7 +1197,7 @@ size_t HUF_readDTableX2_wksp_bmi2(HUF_DT - if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); - /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ - -- iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2); -+ iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), flags); - if (HUF_isError(iSize)) return iSize; - - /* check result */ -@@ -1159,15 +1287,19 @@ HUF_decodeLastSymbolX2(void* op, BIT_DSt - } - - #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ -- ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) -+ do { ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); } while (0) - --#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ -- if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ -- ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) -- --#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ -- if (MEM_64bits()) \ -- ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) -+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ -+ do { \ -+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ -+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \ -+ } while (0) -+ -+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ -+ do { \ -+ if (MEM_64bits()) \ -+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \ -+ } while (0) - - HINT_INLINE size_t - HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, -@@ -1227,7 +1359,7 @@ HUF_decompress1X2_usingDTable_internal_b - - /* decode */ - { BYTE* const ostart = (BYTE*) dst; -- BYTE* const oend = ostart + dstSize; -+ BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, dstSize); - const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ - const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; - DTableDesc const dtd = HUF_getDTableDesc(DTable); -@@ -1240,6 +1372,11 @@ HUF_decompress1X2_usingDTable_internal_b - /* decoded size */ - return dstSize; - } -+ -+/* HUF_decompress4X2_usingDTable_internal_body(): -+ * Conditions: -+ * @dstSize >= 6 -+ */ - FORCE_INLINE_TEMPLATE size_t - HUF_decompress4X2_usingDTable_internal_body( - void* dst, size_t dstSize, -@@ -1247,6 +1384,7 @@ HUF_decompress4X2_usingDTable_internal_b - const HUF_DTable* DTable) - { - if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ -+ if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */ - - { const BYTE* const istart = (const BYTE*) cSrc; - BYTE* const ostart = (BYTE*) dst; -@@ -1280,8 +1418,9 @@ HUF_decompress4X2_usingDTable_internal_b - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; - -- if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ -- if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ -+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ -+ if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ -+ assert(dstSize >= 6 /* validated above */); - CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); - CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); - CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); -@@ -1366,44 +1505,191 @@ size_t HUF_decompress4X2_usingDTable_int - } - #endif - --#if HUF_NEED_DEFAULT_FUNCTION - static - size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc, - size_t cSrcSize, HUF_DTable const* DTable) { - return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); - } --#endif - - #if ZSTD_ENABLE_ASM_X86_64_BMI2 - --HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN; -+HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN; -+ -+#endif -+ -+static HUF_FAST_BMI2_ATTRS -+void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args) -+{ -+ U64 bits[4]; -+ BYTE const* ip[4]; -+ BYTE* op[4]; -+ BYTE* oend[4]; -+ HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt; -+ BYTE const* const ilowest = args->ilowest; -+ -+ /* Copy the arguments to local registers. */ -+ ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); -+ ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip)); -+ ZSTD_memcpy(&op, &args->op, sizeof(op)); -+ -+ oend[0] = op[1]; -+ oend[1] = op[2]; -+ oend[2] = op[3]; -+ oend[3] = args->oend; -+ -+ assert(MEM_isLittleEndian()); -+ assert(!MEM_32bits()); -+ -+ for (;;) { -+ BYTE* olimit; -+ int stream; -+ -+ /* Assert loop preconditions */ -+#ifndef NDEBUG -+ for (stream = 0; stream < 4; ++stream) { -+ assert(op[stream] <= oend[stream]); -+ assert(ip[stream] >= ilowest); -+ } -+#endif -+ /* Compute olimit */ -+ { -+ /* Each loop does 5 table lookups for each of the 4 streams. -+ * Each table lookup consumes up to 11 bits of input, and produces -+ * up to 2 bytes of output. -+ */ -+ /* We can consume up to 7 bytes of input per iteration per stream. -+ * We also know that each input pointer is >= ip[0]. So we can run -+ * iters loops before running out of input. -+ */ -+ size_t iters = (size_t)(ip[0] - ilowest) / 7; -+ /* Each iteration can produce up to 10 bytes of output per stream. -+ * Each output stream my advance at different rates. So take the -+ * minimum number of safe iterations among all the output streams. -+ */ -+ for (stream = 0; stream < 4; ++stream) { -+ size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10; -+ iters = MIN(iters, oiters); -+ } -+ -+ /* Each iteration produces at least 5 output symbols. So until -+ * op[3] crosses olimit, we know we haven't executed iters -+ * iterations yet. This saves us maintaining an iters counter, -+ * at the expense of computing the remaining # of iterations -+ * more frequently. -+ */ -+ olimit = op[3] + (iters * 5); -+ -+ /* Exit the fast decoding loop once we reach the end. */ -+ if (op[3] == olimit) -+ break; -+ -+ /* Exit the decoding loop if any input pointer has crossed the -+ * previous one. This indicates corruption, and a precondition -+ * to our loop is that ip[i] >= ip[0]. -+ */ -+ for (stream = 1; stream < 4; ++stream) { -+ if (ip[stream] < ip[stream - 1]) -+ goto _out; -+ } -+ } -+ -+#ifndef NDEBUG -+ for (stream = 1; stream < 4; ++stream) { -+ assert(ip[stream] >= ip[stream - 1]); -+ } -+#endif - --static HUF_ASM_X86_64_BMI2_ATTRS size_t --HUF_decompress4X2_usingDTable_internal_bmi2_asm( -+#define HUF_4X2_DECODE_SYMBOL(_stream, _decode3) \ -+ do { \ -+ if ((_decode3) || (_stream) != 3) { \ -+ int const index = (int)(bits[(_stream)] >> 53); \ -+ HUF_DEltX2 const entry = dtable[index]; \ -+ MEM_write16(op[(_stream)], entry.sequence); \ -+ bits[(_stream)] <<= (entry.nbBits) & 0x3F; \ -+ op[(_stream)] += (entry.length); \ -+ } \ -+ } while (0) -+ -+#define HUF_4X2_RELOAD_STREAM(_stream) \ -+ do { \ -+ HUF_4X2_DECODE_SYMBOL(3, 1); \ -+ { \ -+ int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \ -+ int const nbBits = ctz & 7; \ -+ int const nbBytes = ctz >> 3; \ -+ ip[(_stream)] -= nbBytes; \ -+ bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \ -+ bits[(_stream)] <<= nbBits; \ -+ } \ -+ } while (0) -+ -+ /* Manually unroll the loop because compilers don't consistently -+ * unroll the inner loops, which destroys performance. -+ */ -+ do { -+ /* Decode 5 symbols from each of the first 3 streams. -+ * The final stream will be decoded during the reload phase -+ * to reduce register pressure. -+ */ -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); -+ -+ /* Decode one symbol from the final stream */ -+ HUF_4X2_DECODE_SYMBOL(3, 1); -+ -+ /* Decode 4 symbols from the final stream & reload bitstreams. -+ * The final stream is reloaded last, meaning that all 5 symbols -+ * are decoded from the final stream before it is reloaded. -+ */ -+ HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM); -+ } while (op[3] < olimit); -+ } -+ -+#undef HUF_4X2_DECODE_SYMBOL -+#undef HUF_4X2_RELOAD_STREAM -+ -+_out: -+ -+ /* Save the final values of each of the state variables back to args. */ -+ ZSTD_memcpy(&args->bits, &bits, sizeof(bits)); -+ ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip)); -+ ZSTD_memcpy(&args->op, &op, sizeof(op)); -+} -+ -+ -+static HUF_FAST_BMI2_ATTRS size_t -+HUF_decompress4X2_usingDTable_internal_fast( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, -- const HUF_DTable* DTable) { -+ const HUF_DTable* DTable, -+ HUF_DecompressFastLoopFn loopFn) { - void const* dt = DTable + 1; -- const BYTE* const iend = (const BYTE*)cSrc + 6; -- BYTE* const oend = (BYTE*)dst + dstSize; -- HUF_DecompressAsmArgs args; -+ const BYTE* const ilowest = (const BYTE*)cSrc; -+ BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize); -+ HUF_DecompressFastArgs args; - { -- size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); -+ size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); - FORWARD_IF_ERROR(ret, "Failed to init asm args"); -- if (ret != 0) -- return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); -+ if (ret == 0) -+ return 0; - } - -- assert(args.ip[0] >= args.ilimit); -- HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args); -+ assert(args.ip[0] >= args.ilowest); -+ loopFn(&args); - - /* note : op4 already verified within main loop */ -- assert(args.ip[0] >= iend); -- assert(args.ip[1] >= iend); -- assert(args.ip[2] >= iend); -- assert(args.ip[3] >= iend); -+ assert(args.ip[0] >= ilowest); -+ assert(args.ip[1] >= ilowest); -+ assert(args.ip[2] >= ilowest); -+ assert(args.ip[3] >= ilowest); - assert(args.op[3] <= oend); -- (void)iend; -+ -+ assert(ilowest == args.ilowest); -+ assert(ilowest + 6 == args.iend[0]); -+ (void)ilowest; - - /* finish bitStreams one by one */ - { -@@ -1426,91 +1712,72 @@ HUF_decompress4X2_usingDTable_internal_b - /* decoded size */ - return dstSize; - } --#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */ - - static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc, -- size_t cSrcSize, HUF_DTable const* DTable, int bmi2) -+ size_t cSrcSize, HUF_DTable const* DTable, int flags) - { -+ HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X2_usingDTable_internal_default; -+ HUF_DecompressFastLoopFn loopFn = HUF_decompress4X2_usingDTable_internal_fast_c_loop; -+ - #if DYNAMIC_BMI2 -- if (bmi2) { -+ if (flags & HUF_flags_bmi2) { -+ fallbackFn = HUF_decompress4X2_usingDTable_internal_bmi2; - # if ZSTD_ENABLE_ASM_X86_64_BMI2 -- return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); --# else -- return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); -+ if (!(flags & HUF_flags_disableAsm)) { -+ loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop; -+ } - # endif -+ } else { -+ return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable); - } --#else -- (void)bmi2; - #endif - - #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__) -- return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); --#else -- return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable); -+ if (!(flags & HUF_flags_disableAsm)) { -+ loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop; -+ } - #endif -+ -+ if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) { -+ size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn); -+ if (ret != 0) -+ return ret; -+ } -+ return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable); - } - - HUF_DGEN(HUF_decompress1X2_usingDTable_internal) - --size_t HUF_decompress1X2_usingDTable( -- void* dst, size_t dstSize, -- const void* cSrc, size_t cSrcSize, -- const HUF_DTable* DTable) --{ -- DTableDesc dtd = HUF_getDTableDesc(DTable); -- if (dtd.tableType != 1) return ERROR(GENERIC); -- return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); --} -- - size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, -- void* workSpace, size_t wkspSize) -+ void* workSpace, size_t wkspSize, int flags) - { - const BYTE* ip = (const BYTE*) cSrc; - - size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, -- workSpace, wkspSize); -+ workSpace, wkspSize, flags); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - -- return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); --} -- -- --size_t HUF_decompress4X2_usingDTable( -- void* dst, size_t dstSize, -- const void* cSrc, size_t cSrcSize, -- const HUF_DTable* DTable) --{ -- DTableDesc dtd = HUF_getDTableDesc(DTable); -- if (dtd.tableType != 1) return ERROR(GENERIC); -- return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, flags); - } - --static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, -+static size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, -- void* workSpace, size_t wkspSize, int bmi2) -+ void* workSpace, size_t wkspSize, int flags) - { - const BYTE* ip = (const BYTE*) cSrc; - - size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, -- workSpace, wkspSize); -+ workSpace, wkspSize, flags); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - -- return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); -+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags); - } - --size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, -- const void* cSrc, size_t cSrcSize, -- void* workSpace, size_t wkspSize) --{ -- return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); --} -- -- - #endif /* HUF_FORCE_DECOMPRESS_X1 */ - - -@@ -1518,44 +1785,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_D - /* Universal decompression selectors */ - /* ***********************************/ - --size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, -- const void* cSrc, size_t cSrcSize, -- const HUF_DTable* DTable) --{ -- DTableDesc const dtd = HUF_getDTableDesc(DTable); --#if defined(HUF_FORCE_DECOMPRESS_X1) -- (void)dtd; -- assert(dtd.tableType == 0); -- return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); --#elif defined(HUF_FORCE_DECOMPRESS_X2) -- (void)dtd; -- assert(dtd.tableType == 1); -- return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); --#else -- return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : -- HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); --#endif --} -- --size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, -- const void* cSrc, size_t cSrcSize, -- const HUF_DTable* DTable) --{ -- DTableDesc const dtd = HUF_getDTableDesc(DTable); --#if defined(HUF_FORCE_DECOMPRESS_X1) -- (void)dtd; -- assert(dtd.tableType == 0); -- return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); --#elif defined(HUF_FORCE_DECOMPRESS_X2) -- (void)dtd; -- assert(dtd.tableType == 1); -- return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); --#else -- return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : -- HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); --#endif --} -- - - #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) - typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; -@@ -1610,36 +1839,9 @@ U32 HUF_selectDecoder (size_t dstSize, s - #endif - } - -- --size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, -- size_t dstSize, const void* cSrc, -- size_t cSrcSize, void* workSpace, -- size_t wkspSize) --{ -- /* validation checks */ -- if (dstSize == 0) return ERROR(dstSize_tooSmall); -- if (cSrcSize == 0) return ERROR(corruption_detected); -- -- { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); --#if defined(HUF_FORCE_DECOMPRESS_X1) -- (void)algoNb; -- assert(algoNb == 0); -- return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); --#elif defined(HUF_FORCE_DECOMPRESS_X2) -- (void)algoNb; -- assert(algoNb == 1); -- return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); --#else -- return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, -- cSrcSize, workSpace, wkspSize): -- HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); --#endif -- } --} -- - size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, -- void* workSpace, size_t wkspSize) -+ void* workSpace, size_t wkspSize, int flags) - { - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); -@@ -1652,71 +1854,71 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DT - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, -- cSrcSize, workSpace, wkspSize); -+ cSrcSize, workSpace, wkspSize, flags); - #elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, -- cSrcSize, workSpace, wkspSize); -+ cSrcSize, workSpace, wkspSize, flags); - #else - return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, -- cSrcSize, workSpace, wkspSize): -+ cSrcSize, workSpace, wkspSize, flags): - HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, -- cSrcSize, workSpace, wkspSize); -+ cSrcSize, workSpace, wkspSize, flags); - #endif - } - } - - --size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) -+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags) - { - DTableDesc const dtd = HUF_getDTableDesc(DTable); - #if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); -- return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -+ return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); - #elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); -- return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -+ return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); - #else -- return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : -- HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) : -+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); - #endif - } - - #ifndef HUF_FORCE_DECOMPRESS_X2 --size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) -+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags) - { - const BYTE* ip = (const BYTE*) cSrc; - -- size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -+ size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - -- return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); -+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags); - } - #endif - --size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) -+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags) - { - DTableDesc const dtd = HUF_getDTableDesc(DTable); - #if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); -- return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -+ return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); - #elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); -- return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -+ return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); - #else -- return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : -- HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) : -+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); - #endif - } - --size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) -+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags) - { - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); -@@ -1726,15 +1928,14 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi - #if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); -- return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags); - #elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); -- return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags); - #else -- return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : -- HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -+ return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags) : -+ HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags); - #endif - } - } -- ---- a/lib/zstd/decompress/zstd_ddict.c -+++ b/lib/zstd/decompress/zstd_ddict.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -14,12 +15,12 @@ - /*-******************************************************* - * Dependencies - *********************************************************/ -+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */ - #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ - #include "../common/cpu.h" /* bmi2 */ - #include "../common/mem.h" /* low level memory routines */ - #define FSE_STATIC_LINKING_ONLY - #include "../common/fse.h" --#define HUF_STATIC_LINKING_ONLY - #include "../common/huf.h" - #include "zstd_decompress_internal.h" - #include "zstd_ddict.h" -@@ -131,7 +132,7 @@ static size_t ZSTD_initDDict_internal(ZS - ZSTD_memcpy(internalBuffer, dict, dictSize); - } - ddict->dictSize = dictSize; -- ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ -+ ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */ - - /* parse dictionary content */ - FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); -@@ -237,5 +238,5 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDic - unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) - { - if (ddict==NULL) return 0; -- return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); -+ return ddict->dictID; - } ---- a/lib/zstd/decompress/zstd_ddict.h -+++ b/lib/zstd/decompress/zstd_ddict.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the ---- a/lib/zstd/decompress/zstd_decompress.c -+++ b/lib/zstd/decompress/zstd_decompress.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -53,13 +54,15 @@ - * Dependencies - *********************************************************/ - #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ -+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */ -+#include "../common/error_private.h" -+#include "../common/zstd_internal.h" /* blockProperties_t */ - #include "../common/mem.h" /* low level memory routines */ -+#include "../common/bits.h" /* ZSTD_highbit32 */ - #define FSE_STATIC_LINKING_ONLY - #include "../common/fse.h" --#define HUF_STATIC_LINKING_ONLY - #include "../common/huf.h" - #include /* xxh64_reset, xxh64_update, xxh64_digest, XXH64 */ --#include "../common/zstd_internal.h" /* blockProperties_t */ - #include "zstd_decompress_internal.h" /* ZSTD_DCtx */ - #include "zstd_ddict.h" /* ZSTD_DDictDictContent */ - #include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */ -@@ -72,11 +75,11 @@ - *************************************/ - - #define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4 --#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float. -- * Currently, that means a 0.75 load factor. -- * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded -- * the load factor of the ddict hash set. -- */ -+#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float. -+ * Currently, that means a 0.75 load factor. -+ * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded -+ * the load factor of the ddict hash set. -+ */ - - #define DDICT_HASHSET_TABLE_BASE_SIZE 64 - #define DDICT_HASHSET_RESIZE_FACTOR 2 -@@ -237,6 +240,8 @@ static void ZSTD_DCtx_resetParameters(ZS - dctx->outBufferMode = ZSTD_bm_buffered; - dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; - dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict; -+ dctx->disableHufAsm = 0; -+ dctx->maxBlockSizeParam = 0; - } - - static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) -@@ -253,6 +258,7 @@ static void ZSTD_initDCtx_internal(ZSTD_ - dctx->streamStage = zdss_init; - dctx->noForwardProgress = 0; - dctx->oversizedDuration = 0; -+ dctx->isFrameDecompression = 1; - #if DYNAMIC_BMI2 - dctx->bmi2 = ZSTD_cpuSupportsBmi2(); - #endif -@@ -421,16 +427,40 @@ size_t ZSTD_frameHeaderSize(const void* - * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless - * @return : 0, `zfhPtr` is correctly filled, - * >0, `srcSize` is too small, value is wanted `srcSize` amount, -- * or an error code, which can be tested using ZSTD_isError() */ --size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) -+** or an error code, which can be tested using ZSTD_isError() */ -+size_t ZSTD_getFrameHeader_advanced(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) - { - const BYTE* ip = (const BYTE*)src; - size_t const minInputSize = ZSTD_startingInputLength(format); - -- ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ -- if (srcSize < minInputSize) return minInputSize; -- RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); -+ DEBUGLOG(5, "ZSTD_getFrameHeader_advanced: minInputSize = %zu, srcSize = %zu", minInputSize, srcSize); -+ -+ if (srcSize > 0) { -+ /* note : technically could be considered an assert(), since it's an invalid entry */ -+ RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter : src==NULL, but srcSize>0"); -+ } -+ if (srcSize < minInputSize) { -+ if (srcSize > 0 && format != ZSTD_f_zstd1_magicless) { -+ /* when receiving less than @minInputSize bytes, -+ * control these bytes at least correspond to a supported magic number -+ * in order to error out early if they don't. -+ **/ -+ size_t const toCopy = MIN(4, srcSize); -+ unsigned char hbuf[4]; MEM_writeLE32(hbuf, ZSTD_MAGICNUMBER); -+ assert(src != NULL); -+ ZSTD_memcpy(hbuf, src, toCopy); -+ if ( MEM_readLE32(hbuf) != ZSTD_MAGICNUMBER ) { -+ /* not a zstd frame : let's check if it's a skippable frame */ -+ MEM_writeLE32(hbuf, ZSTD_MAGIC_SKIPPABLE_START); -+ ZSTD_memcpy(hbuf, src, toCopy); -+ if ((MEM_readLE32(hbuf) & ZSTD_MAGIC_SKIPPABLE_MASK) != ZSTD_MAGIC_SKIPPABLE_START) { -+ RETURN_ERROR(prefix_unknown, -+ "first bytes don't correspond to any supported magic number"); -+ } } } -+ return minInputSize; -+ } - -+ ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzers may not understand that zfhPtr will be read only if return value is zero, since they are 2 different signals */ - if ( (format != ZSTD_f_zstd1_magicless) - && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { - if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { -@@ -438,8 +468,10 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD - if (srcSize < ZSTD_SKIPPABLEHEADERSIZE) - return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */ - ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); -- zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); - zfhPtr->frameType = ZSTD_skippableFrame; -+ zfhPtr->dictID = MEM_readLE32(src) - ZSTD_MAGIC_SKIPPABLE_START; -+ zfhPtr->headerSize = ZSTD_SKIPPABLEHEADERSIZE; -+ zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); - return 0; - } - RETURN_ERROR(prefix_unknown, ""); -@@ -508,7 +540,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD - * @return : 0, `zfhPtr` is correctly filled, - * >0, `srcSize` is too small, value is wanted `srcSize` amount, - * or an error code, which can be tested using ZSTD_isError() */ --size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize) -+size_t ZSTD_getFrameHeader(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize) - { - return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1); - } -@@ -520,7 +552,7 @@ size_t ZSTD_getFrameHeader(ZSTD_frameHea - * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ - unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) - { -- { ZSTD_frameHeader zfh; -+ { ZSTD_FrameHeader zfh; - if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0) - return ZSTD_CONTENTSIZE_ERROR; - if (zfh.frameType == ZSTD_skippableFrame) { -@@ -540,49 +572,52 @@ static size_t readSkippableFrameSize(voi - sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); - RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, - frameParameter_unsupported, ""); -- { -- size_t const skippableSize = skippableHeaderSize + sizeU32; -+ { size_t const skippableSize = skippableHeaderSize + sizeU32; - RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, ""); - return skippableSize; - } - } - - /*! ZSTD_readSkippableFrame() : -- * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer. -+ * Retrieves content of a skippable frame, and writes it to dst buffer. - * - * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written, - * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested - * in the magicVariant. - * -- * Returns an error if destination buffer is not large enough, or if the frame is not skippable. -+ * Returns an error if destination buffer is not large enough, or if this is not a valid skippable frame. - * - * @return : number of bytes written or a ZSTD error. - */ --ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant, -- const void* src, size_t srcSize) -+size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, -+ unsigned* magicVariant, /* optional, can be NULL */ -+ const void* src, size_t srcSize) - { -- U32 const magicNumber = MEM_readLE32(src); -- size_t skippableFrameSize = readSkippableFrameSize(src, srcSize); -- size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE; -- -- /* check input validity */ -- RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, ""); -- RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, ""); -- RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, ""); -- -- /* deliver payload */ -- if (skippableContentSize > 0 && dst != NULL) -- ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize); -- if (magicVariant != NULL) -- *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START; -- return skippableContentSize; -+ RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, ""); -+ -+ { U32 const magicNumber = MEM_readLE32(src); -+ size_t skippableFrameSize = readSkippableFrameSize(src, srcSize); -+ size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE; -+ -+ /* check input validity */ -+ RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, ""); -+ RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, ""); -+ RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, ""); -+ -+ /* deliver payload */ -+ if (skippableContentSize > 0 && dst != NULL) -+ ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize); -+ if (magicVariant != NULL) -+ *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START; -+ return skippableContentSize; -+ } - } - - /* ZSTD_findDecompressedSize() : -- * compatible with legacy mode - * `srcSize` must be the exact length of some number of ZSTD compressed and/or - * skippable frames -- * @return : decompressed size of the frames contained */ -+ * note: compatible with legacy mode -+ * @return : decompressed size of the frames contained */ - unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) - { - unsigned long long totalDstSize = 0; -@@ -592,9 +627,7 @@ unsigned long long ZSTD_findDecompressed - - if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { - size_t const skippableSize = readSkippableFrameSize(src, srcSize); -- if (ZSTD_isError(skippableSize)) { -- return ZSTD_CONTENTSIZE_ERROR; -- } -+ if (ZSTD_isError(skippableSize)) return ZSTD_CONTENTSIZE_ERROR; - assert(skippableSize <= srcSize); - - src = (const BYTE *)src + skippableSize; -@@ -602,17 +635,17 @@ unsigned long long ZSTD_findDecompressed - continue; - } - -- { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); -- if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret; -+ { unsigned long long const fcs = ZSTD_getFrameContentSize(src, srcSize); -+ if (fcs >= ZSTD_CONTENTSIZE_ERROR) return fcs; - -- /* check for overflow */ -- if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR; -- totalDstSize += ret; -+ if (totalDstSize + fcs < totalDstSize) -+ return ZSTD_CONTENTSIZE_ERROR; /* check for overflow */ -+ totalDstSize += fcs; - } -+ /* skip to next frame */ - { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); -- if (ZSTD_isError(frameSrcSize)) { -- return ZSTD_CONTENTSIZE_ERROR; -- } -+ if (ZSTD_isError(frameSrcSize)) return ZSTD_CONTENTSIZE_ERROR; -+ assert(frameSrcSize <= srcSize); - - src = (const BYTE *)src + frameSrcSize; - srcSize -= frameSrcSize; -@@ -676,13 +709,13 @@ static ZSTD_frameSizeInfo ZSTD_errorFram - return frameSizeInfo; - } - --static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) -+static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize, ZSTD_format_e format) - { - ZSTD_frameSizeInfo frameSizeInfo; - ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); - - -- if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) -+ if (format == ZSTD_f_zstd1 && (srcSize >= ZSTD_SKIPPABLEHEADERSIZE) - && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { - frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); - assert(ZSTD_isError(frameSizeInfo.compressedSize) || -@@ -693,10 +726,10 @@ static ZSTD_frameSizeInfo ZSTD_findFrame - const BYTE* const ipstart = ip; - size_t remainingSize = srcSize; - size_t nbBlocks = 0; -- ZSTD_frameHeader zfh; -+ ZSTD_FrameHeader zfh; - - /* Extract Frame Header */ -- { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); -+ { size_t const ret = ZSTD_getFrameHeader_advanced(&zfh, src, srcSize, format); - if (ZSTD_isError(ret)) - return ZSTD_errorFrameSizeInfo(ret); - if (ret > 0) -@@ -730,28 +763,31 @@ static ZSTD_frameSizeInfo ZSTD_findFrame - ip += 4; - } - -+ frameSizeInfo.nbBlocks = nbBlocks; - frameSizeInfo.compressedSize = (size_t)(ip - ipstart); - frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) - ? zfh.frameContentSize -- : nbBlocks * zfh.blockSizeMax; -+ : (unsigned long long)nbBlocks * zfh.blockSizeMax; - return frameSizeInfo; - } - } - -+static size_t ZSTD_findFrameCompressedSize_advanced(const void *src, size_t srcSize, ZSTD_format_e format) { -+ ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, format); -+ return frameSizeInfo.compressedSize; -+} -+ - /* ZSTD_findFrameCompressedSize() : -- * compatible with legacy mode -- * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame -- * `srcSize` must be at least as large as the frame contained -- * @return : the compressed size of the frame starting at `src` */ -+ * See docs in zstd.h -+ * Note: compatible with legacy mode */ - size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) - { -- ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); -- return frameSizeInfo.compressedSize; -+ return ZSTD_findFrameCompressedSize_advanced(src, srcSize, ZSTD_f_zstd1); - } - - /* ZSTD_decompressBound() : - * compatible with legacy mode -- * `src` must point to the start of a ZSTD frame or a skippeable frame -+ * `src` must point to the start of a ZSTD frame or a skippable frame - * `srcSize` must be at least as large as the frame contained - * @return : the maximum decompressed size of the compressed source - */ -@@ -760,7 +796,7 @@ unsigned long long ZSTD_decompressBound( - unsigned long long bound = 0; - /* Iterate over each frame */ - while (srcSize > 0) { -- ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); -+ ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1); - size_t const compressedSize = frameSizeInfo.compressedSize; - unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; - if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) -@@ -773,6 +809,48 @@ unsigned long long ZSTD_decompressBound( - return bound; - } - -+size_t ZSTD_decompressionMargin(void const* src, size_t srcSize) -+{ -+ size_t margin = 0; -+ unsigned maxBlockSize = 0; -+ -+ /* Iterate over each frame */ -+ while (srcSize > 0) { -+ ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1); -+ size_t const compressedSize = frameSizeInfo.compressedSize; -+ unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; -+ ZSTD_FrameHeader zfh; -+ -+ FORWARD_IF_ERROR(ZSTD_getFrameHeader(&zfh, src, srcSize), ""); -+ if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) -+ return ERROR(corruption_detected); -+ -+ if (zfh.frameType == ZSTD_frame) { -+ /* Add the frame header to our margin */ -+ margin += zfh.headerSize; -+ /* Add the checksum to our margin */ -+ margin += zfh.checksumFlag ? 4 : 0; -+ /* Add 3 bytes per block */ -+ margin += 3 * frameSizeInfo.nbBlocks; -+ -+ /* Compute the max block size */ -+ maxBlockSize = MAX(maxBlockSize, zfh.blockSizeMax); -+ } else { -+ assert(zfh.frameType == ZSTD_skippableFrame); -+ /* Add the entire skippable frame size to our margin. */ -+ margin += compressedSize; -+ } -+ -+ assert(srcSize >= compressedSize); -+ src = (const BYTE*)src + compressedSize; -+ srcSize -= compressedSize; -+ } -+ -+ /* Add the max block size back to the margin. */ -+ margin += maxBlockSize; -+ -+ return margin; -+} - - /*-************************************************************* - * Frame decoding -@@ -815,7 +893,7 @@ static size_t ZSTD_setRleBlock(void* dst - return regenSize; - } - --static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming) -+static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, int streaming) - { - (void)dctx; - (void)uncompressedSize; -@@ -856,6 +934,10 @@ static size_t ZSTD_decompressFrame(ZSTD_ - ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; - } - -+ /* Shrink the blockSizeMax if enabled */ -+ if (dctx->maxBlockSizeParam != 0) -+ dctx->fParams.blockSizeMax = MIN(dctx->fParams.blockSizeMax, (unsigned)dctx->maxBlockSizeParam); -+ - /* Loop on each block */ - while (1) { - BYTE* oBlockEnd = oend; -@@ -888,7 +970,8 @@ static size_t ZSTD_decompressFrame(ZSTD_ - switch(blockProperties.blockType) - { - case bt_compressed: -- decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, /* frame */ 1, not_streaming); -+ assert(dctx->isFrameDecompression == 1); -+ decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, not_streaming); - break; - case bt_raw : - /* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */ -@@ -901,12 +984,14 @@ static size_t ZSTD_decompressFrame(ZSTD_ - default: - RETURN_ERROR(corruption_detected, "invalid block type"); - } -- -- if (ZSTD_isError(decodedSize)) return decodedSize; -- if (dctx->validateChecksum) -+ FORWARD_IF_ERROR(decodedSize, "Block decompression failure"); -+ DEBUGLOG(5, "Decompressed block of dSize = %u", (unsigned)decodedSize); -+ if (dctx->validateChecksum) { - xxh64_update(&dctx->xxhState, op, decodedSize); -- if (decodedSize != 0) -+ } -+ if (decodedSize) /* support dst = NULL,0 */ { - op += decodedSize; -+ } - assert(ip != NULL); - ip += cBlockSize; - remainingSrcSize -= cBlockSize; -@@ -930,12 +1015,15 @@ static size_t ZSTD_decompressFrame(ZSTD_ - } - ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0); - /* Allow caller to get size read */ -+ DEBUGLOG(4, "ZSTD_decompressFrame: decompressed frame of size %i, consuming %i bytes of input", (int)(op-ostart), (int)(ip - (const BYTE*)*srcPtr)); - *srcPtr = ip; - *srcSizePtr = remainingSrcSize; - return (size_t)(op-ostart); - } - --static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict, size_t dictSize, -@@ -955,17 +1043,18 @@ static size_t ZSTD_decompressMultiFrame( - while (srcSize >= ZSTD_startingInputLength(dctx->format)) { - - -- { U32 const magicNumber = MEM_readLE32(src); -- DEBUGLOG(4, "reading magic number %08X (expecting %08X)", -- (unsigned)magicNumber, ZSTD_MAGICNUMBER); -+ if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4) { -+ U32 const magicNumber = MEM_readLE32(src); -+ DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber); - if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { -+ /* skippable frame detected : skip it */ - size_t const skippableSize = readSkippableFrameSize(src, srcSize); -- FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed"); -+ FORWARD_IF_ERROR(skippableSize, "invalid skippable frame"); - assert(skippableSize <= srcSize); - - src = (const BYTE *)src + skippableSize; - srcSize -= skippableSize; -- continue; -+ continue; /* check next frame */ - } } - - if (ddict) { -@@ -1061,8 +1150,8 @@ size_t ZSTD_decompress(void* dst, size_t - size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } - - /* -- * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed, -- * we allow taking a partial block as the input. Currently only raw uncompressed blocks can -+ * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed, we -+ * allow taking a partial block as the input. Currently only raw uncompressed blocks can - * be streamed. - * - * For blocks that can be streamed, this allows us to reduce the latency until we produce -@@ -1181,7 +1270,8 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx - { - case bt_compressed: - DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); -- rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming); -+ assert(dctx->isFrameDecompression == 1); -+ rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, is_streaming); - dctx->expected = 0; /* Streaming not supported */ - break; - case bt_raw : -@@ -1250,6 +1340,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx - case ZSTDds_decodeSkippableHeader: - assert(src != NULL); - assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); -+ assert(dctx->format != ZSTD_f_zstd1_magicless); - ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ - dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ - dctx->stage = ZSTDds_skipFrame; -@@ -1262,7 +1353,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx - - default: - assert(0); /* impossible */ -- RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ -+ RETURN_ERROR(GENERIC, "impossible to reach"); /* some compilers require default to do something */ - } - } - -@@ -1303,11 +1394,11 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* - /* in minimal huffman, we always use X1 variants */ - size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, - dictPtr, dictEnd - dictPtr, -- workspace, workspaceSize); -+ workspace, workspaceSize, /* flags */ 0); - #else - size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, - dictPtr, (size_t)(dictEnd - dictPtr), -- workspace, workspaceSize); -+ workspace, workspaceSize, /* flags */ 0); - #endif - RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); - dictPtr += hSize; -@@ -1403,10 +1494,11 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* d - dctx->prefixStart = NULL; - dctx->virtualStart = NULL; - dctx->dictEnd = NULL; -- dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ -+ dctx->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */ - dctx->litEntropy = dctx->fseEntropy = 0; - dctx->dictID = 0; - dctx->bType = bt_reserved; -+ dctx->isFrameDecompression = 1; - ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); - ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ - dctx->LLTptr = dctx->entropy.LLTable; -@@ -1465,7 +1557,7 @@ unsigned ZSTD_getDictID_fromDict(const v - * This could for one of the following reasons : - * - The frame does not require a dictionary (most common case). - * - The frame was built with dictID intentionally removed. -- * Needed dictionary is a hidden information. -+ * Needed dictionary is a hidden piece of information. - * Note : this use case also happens when using a non-conformant dictionary. - * - `srcSize` is too small, and as a result, frame header could not be decoded. - * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. -@@ -1474,7 +1566,7 @@ unsigned ZSTD_getDictID_fromDict(const v - * ZSTD_getFrameHeader(), which will provide a more precise error code. */ - unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) - { -- ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 }; -+ ZSTD_FrameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0, 0, 0 }; - size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); - if (ZSTD_isError(hError)) return 0; - return zfp.dictID; -@@ -1581,7 +1673,9 @@ size_t ZSTD_initDStream_usingDict(ZSTD_D - size_t ZSTD_initDStream(ZSTD_DStream* zds) - { - DEBUGLOG(4, "ZSTD_initDStream"); -- return ZSTD_initDStream_usingDDict(zds, NULL); -+ FORWARD_IF_ERROR(ZSTD_DCtx_reset(zds, ZSTD_reset_session_only), ""); -+ FORWARD_IF_ERROR(ZSTD_DCtx_refDDict(zds, NULL), ""); -+ return ZSTD_startingInputLength(zds->format); - } - - /* ZSTD_initDStream_usingDDict() : -@@ -1589,6 +1683,7 @@ size_t ZSTD_initDStream(ZSTD_DStream* zd - * this function cannot fail */ - size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) - { -+ DEBUGLOG(4, "ZSTD_initDStream_usingDDict"); - FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , ""); - return ZSTD_startingInputLength(dctx->format); -@@ -1599,6 +1694,7 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_ - * this function cannot fail */ - size_t ZSTD_resetDStream(ZSTD_DStream* dctx) - { -+ DEBUGLOG(4, "ZSTD_resetDStream"); - FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), ""); - return ZSTD_startingInputLength(dctx->format); - } -@@ -1670,6 +1766,15 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_d - bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict; - bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts; - return bounds; -+ case ZSTD_d_disableHuffmanAssembly: -+ bounds.lowerBound = 0; -+ bounds.upperBound = 1; -+ return bounds; -+ case ZSTD_d_maxBlockSize: -+ bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN; -+ bounds.upperBound = ZSTD_BLOCKSIZE_MAX; -+ return bounds; -+ - default:; - } - bounds.error = ERROR(parameter_unsupported); -@@ -1710,6 +1815,12 @@ size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* - case ZSTD_d_refMultipleDDicts: - *value = (int)dctx->refMultipleDDicts; - return 0; -+ case ZSTD_d_disableHuffmanAssembly: -+ *value = (int)dctx->disableHufAsm; -+ return 0; -+ case ZSTD_d_maxBlockSize: -+ *value = dctx->maxBlockSizeParam; -+ return 0; - default:; - } - RETURN_ERROR(parameter_unsupported, ""); -@@ -1743,6 +1854,14 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* - } - dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value; - return 0; -+ case ZSTD_d_disableHuffmanAssembly: -+ CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value); -+ dctx->disableHufAsm = value != 0; -+ return 0; -+ case ZSTD_d_maxBlockSize: -+ if (value != 0) CHECK_DBOUNDS(ZSTD_d_maxBlockSize, value); -+ dctx->maxBlockSizeParam = value; -+ return 0; - default:; - } - RETURN_ERROR(parameter_unsupported, ""); -@@ -1754,6 +1873,7 @@ size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, - || (reset == ZSTD_reset_session_and_parameters) ) { - dctx->streamStage = zdss_init; - dctx->noForwardProgress = 0; -+ dctx->isFrameDecompression = 1; - } - if ( (reset == ZSTD_reset_parameters) - || (reset == ZSTD_reset_session_and_parameters) ) { -@@ -1770,11 +1890,17 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DS - return ZSTD_sizeof_DCtx(dctx); - } - --size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) -+static size_t ZSTD_decodingBufferSize_internal(unsigned long long windowSize, unsigned long long frameContentSize, size_t blockSizeMax) - { -- size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); -- /* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/ -- unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2); -+ size_t const blockSize = MIN((size_t)MIN(windowSize, ZSTD_BLOCKSIZE_MAX), blockSizeMax); -+ /* We need blockSize + WILDCOPY_OVERLENGTH worth of buffer so that if a block -+ * ends at windowSize + WILDCOPY_OVERLENGTH + 1 bytes, we can start writing -+ * the block at the beginning of the output buffer, and maintain a full window. -+ * -+ * We need another blockSize worth of buffer so that we can store split -+ * literals at the end of the block without overwriting the extDict window. -+ */ -+ unsigned long long const neededRBSize = windowSize + (blockSize * 2) + (WILDCOPY_OVERLENGTH * 2); - unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); - size_t const minRBSize = (size_t) neededSize; - RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, -@@ -1782,6 +1908,11 @@ size_t ZSTD_decodingBufferSize_min(unsig - return minRBSize; - } - -+size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) -+{ -+ return ZSTD_decodingBufferSize_internal(windowSize, frameContentSize, ZSTD_BLOCKSIZE_MAX); -+} -+ - size_t ZSTD_estimateDStreamSize(size_t windowSize) - { - size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX); -@@ -1793,7 +1924,7 @@ size_t ZSTD_estimateDStreamSize(size_t w - size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) - { - U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable, but requires an additional parameter (or a dctx) */ -- ZSTD_frameHeader zfh; -+ ZSTD_FrameHeader zfh; - size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); - if (ZSTD_isError(err)) return err; - RETURN_ERROR_IF(err>0, srcSize_wrong, ""); -@@ -1888,6 +2019,7 @@ size_t ZSTD_decompressStream(ZSTD_DStrea - U32 someMoreWork = 1; - - DEBUGLOG(5, "ZSTD_decompressStream"); -+ assert(zds != NULL); - RETURN_ERROR_IF( - input->pos > input->size, - srcSize_wrong, -@@ -1918,7 +2050,6 @@ size_t ZSTD_decompressStream(ZSTD_DStrea - if (zds->refMultipleDDicts && zds->ddictSet) { - ZSTD_DCtx_selectFrameDDict(zds); - } -- DEBUGLOG(5, "header size : %u", (U32)hSize); - if (ZSTD_isError(hSize)) { - return hSize; /* error */ - } -@@ -1932,6 +2063,11 @@ size_t ZSTD_decompressStream(ZSTD_DStrea - zds->lhSize += remainingInput; - } - input->pos = input->size; -+ /* check first few bytes */ -+ FORWARD_IF_ERROR( -+ ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format), -+ "First few bytes detected incorrect" ); -+ /* return hint input size */ - return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ - } - assert(ip != NULL); -@@ -1943,14 +2079,15 @@ size_t ZSTD_decompressStream(ZSTD_DStrea - if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN - && zds->fParams.frameType != ZSTD_skippableFrame - && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { -- size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart)); -+ size_t const cSize = ZSTD_findFrameCompressedSize_advanced(istart, (size_t)(iend-istart), zds->format); - if (cSize <= (size_t)(iend-istart)) { - /* shortcut : using single-pass mode */ - size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds)); - if (ZSTD_isError(decompressedSize)) return decompressedSize; -- DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") -+ DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()"); -+ assert(istart != NULL); - ip = istart + cSize; -- op += decompressedSize; -+ op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */ - zds->expected = 0; - zds->streamStage = zdss_init; - someMoreWork = 0; -@@ -1969,7 +2106,8 @@ size_t ZSTD_decompressStream(ZSTD_DStrea - DEBUGLOG(4, "Consume header"); - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); - -- if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ -+ if (zds->format == ZSTD_f_zstd1 -+ && (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ - zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); - zds->stage = ZSTDds_skipFrame; - } else { -@@ -1985,11 +2123,13 @@ size_t ZSTD_decompressStream(ZSTD_DStrea - zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); - RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, - frameParameter_windowTooLarge, ""); -+ if (zds->maxBlockSizeParam != 0) -+ zds->fParams.blockSizeMax = MIN(zds->fParams.blockSizeMax, (unsigned)zds->maxBlockSizeParam); - - /* Adapt buffer sizes to frame header instructions */ - { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); - size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered -- ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) -+ ? ZSTD_decodingBufferSize_internal(zds->fParams.windowSize, zds->fParams.frameContentSize, zds->fParams.blockSizeMax) - : 0; - - ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); -@@ -2034,6 +2174,7 @@ size_t ZSTD_decompressStream(ZSTD_DStrea - } - if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ - FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); -+ assert(ip != NULL); - ip += neededInSize; - /* Function modifies the stage so we must break */ - break; -@@ -2048,7 +2189,7 @@ size_t ZSTD_decompressStream(ZSTD_DStrea - int const isSkipFrame = ZSTD_isSkipFrame(zds); - size_t loadedSize; - /* At this point we shouldn't be decompressing a block that we can stream. */ -- assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip)); -+ assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip))); - if (isSkipFrame) { - loadedSize = MIN(toLoad, (size_t)(iend-ip)); - } else { -@@ -2057,8 +2198,11 @@ size_t ZSTD_decompressStream(ZSTD_DStrea - "should never happen"); - loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip)); - } -- ip += loadedSize; -- zds->inPos += loadedSize; -+ if (loadedSize != 0) { -+ /* ip may be NULL */ -+ ip += loadedSize; -+ zds->inPos += loadedSize; -+ } - if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ - - /* decode loaded input */ -@@ -2068,14 +2212,17 @@ size_t ZSTD_decompressStream(ZSTD_DStrea - break; - } - case zdss_flush: -- { size_t const toFlushSize = zds->outEnd - zds->outStart; -+ { -+ size_t const toFlushSize = zds->outEnd - zds->outStart; - size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize); -- op += flushedSize; -+ -+ op = op ? op + flushedSize : op; -+ - zds->outStart += flushedSize; - if (flushedSize == toFlushSize) { /* flush completed */ - zds->streamStage = zdss_read; - if ( (zds->outBuffSize < zds->fParams.frameContentSize) -- && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { -+ && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { - DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", - (int)(zds->outBuffSize - zds->outStart), - (U32)zds->fParams.blockSizeMax); -@@ -2089,7 +2236,7 @@ size_t ZSTD_decompressStream(ZSTD_DStrea - - default: - assert(0); /* impossible */ -- RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ -+ RETURN_ERROR(GENERIC, "impossible to reach"); /* some compilers require default to do something */ - } } - - /* result */ -@@ -2102,8 +2249,8 @@ size_t ZSTD_decompressStream(ZSTD_DStrea - if ((ip==istart) && (op==ostart)) { /* no forward progress */ - zds->noForwardProgress ++; - if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { -- RETURN_ERROR_IF(op==oend, dstSize_tooSmall, ""); -- RETURN_ERROR_IF(ip==iend, srcSize_wrong, ""); -+ RETURN_ERROR_IF(op==oend, noForwardProgress_destFull, ""); -+ RETURN_ERROR_IF(ip==iend, noForwardProgress_inputEmpty, ""); - assert(0); - } - } else { -@@ -2140,11 +2287,17 @@ size_t ZSTD_decompressStream_simpleArgs - void* dst, size_t dstCapacity, size_t* dstPos, - const void* src, size_t srcSize, size_t* srcPos) - { -- ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; -- ZSTD_inBuffer input = { src, srcSize, *srcPos }; -- /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ -- size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); -- *dstPos = output.pos; -- *srcPos = input.pos; -- return cErr; -+ ZSTD_outBuffer output; -+ ZSTD_inBuffer input; -+ output.dst = dst; -+ output.size = dstCapacity; -+ output.pos = *dstPos; -+ input.src = src; -+ input.size = srcSize; -+ input.pos = *srcPos; -+ { size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); -+ *dstPos = output.pos; -+ *srcPos = input.pos; -+ return cErr; -+ } - } ---- a/lib/zstd/decompress/zstd_decompress_block.c -+++ b/lib/zstd/decompress/zstd_decompress_block.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -20,12 +21,12 @@ - #include "../common/mem.h" /* low level memory routines */ - #define FSE_STATIC_LINKING_ONLY - #include "../common/fse.h" --#define HUF_STATIC_LINKING_ONLY - #include "../common/huf.h" - #include "../common/zstd_internal.h" - #include "zstd_decompress_internal.h" /* ZSTD_DCtx */ - #include "zstd_ddict.h" /* ZSTD_DDictDictContent */ - #include "zstd_decompress_block.h" -+#include "../common/bits.h" /* ZSTD_highbit32 */ - - /*_******************************************************* - * Macros -@@ -51,6 +52,13 @@ static void ZSTD_copy4(void* dst, const - * Block decoding - ***************************************************************/ - -+static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx) -+{ -+ size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX; -+ assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX); -+ return blockSizeMax; -+} -+ - /*! ZSTD_getcBlockSize() : - * Provides the size of compressed block from block header `src` */ - size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, -@@ -73,41 +81,49 @@ size_t ZSTD_getcBlockSize(const void* sr - static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize, - const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately) - { -- if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) -- { -- /* room for litbuffer to fit without read faulting */ -- dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH; -+ size_t const blockSizeMax = ZSTD_blockSizeMax(dctx); -+ assert(litSize <= blockSizeMax); -+ assert(dctx->isFrameDecompression || streaming == not_streaming); -+ assert(expectedWriteSize <= blockSizeMax); -+ if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) { -+ /* If we aren't streaming, we can just put the literals after the output -+ * of the current block. We don't need to worry about overwriting the -+ * extDict of our window, because it doesn't exist. -+ * So if we have space after the end of the block, just put it there. -+ */ -+ dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH; - dctx->litBufferEnd = dctx->litBuffer + litSize; - dctx->litBufferLocation = ZSTD_in_dst; -- } -- else if (litSize > ZSTD_LITBUFFEREXTRASIZE) -- { -- /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */ -+ } else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) { -+ /* Literals fit entirely within the extra buffer, put them there to avoid -+ * having to split the literals. -+ */ -+ dctx->litBuffer = dctx->litExtraBuffer; -+ dctx->litBufferEnd = dctx->litBuffer + litSize; -+ dctx->litBufferLocation = ZSTD_not_in_dst; -+ } else { -+ assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE); -+ /* Literals must be split between the output block and the extra lit -+ * buffer. We fill the extra lit buffer with the tail of the literals, -+ * and put the rest of the literals at the end of the block, with -+ * WILDCOPY_OVERLENGTH of buffer room to allow for overreads. -+ * This MUST not write more than our maxBlockSize beyond dst, because in -+ * streaming mode, that could overwrite part of our extDict window. -+ */ - if (splitImmediately) { - /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */ - dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH; - dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE; -- } -- else { -- /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */ -+ } else { -+ /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */ - dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize; - dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize; - } - dctx->litBufferLocation = ZSTD_split; -- } -- else -- { -- /* fits entirely within litExtraBuffer, so no split is necessary */ -- dctx->litBuffer = dctx->litExtraBuffer; -- dctx->litBufferEnd = dctx->litBuffer + litSize; -- dctx->litBufferLocation = ZSTD_not_in_dst; -+ assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize); - } - } - --/* Hidden declaration for fullbench */ --size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, -- const void* src, size_t srcSize, -- void* dst, size_t dstCapacity, const streaming_operation streaming); - /*! ZSTD_decodeLiteralsBlock() : - * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored - * in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current -@@ -116,7 +132,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt - * - * @return : nb of bytes read from src (< srcSize ) - * note : symbol not declared but exposed for fullbench */ --size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, -+static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */ - void* dst, size_t dstCapacity, const streaming_operation streaming) - { -@@ -124,7 +140,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt - RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); - - { const BYTE* const istart = (const BYTE*) src; -- symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); -+ SymbolEncodingType_e const litEncType = (SymbolEncodingType_e)(istart[0] & 3); -+ size_t const blockSizeMax = ZSTD_blockSizeMax(dctx); - - switch(litEncType) - { -@@ -134,13 +151,16 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt - ZSTD_FALLTHROUGH; - - case set_compressed: -- RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); -+ RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3"); - { size_t lhSize, litSize, litCSize; - U32 singleStream=0; - U32 const lhlCode = (istart[0] >> 2) & 3; - U32 const lhc = MEM_readLE32(istart); - size_t hufSuccess; -- size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); -+ size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity); -+ int const flags = 0 -+ | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0) -+ | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0); - switch(lhlCode) - { - case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ -@@ -164,7 +184,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt - break; - } - RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); -- RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); -+ RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, ""); -+ if (!singleStream) -+ RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong, -+ "Not enough literals (%zu) for the 4-streams mode (min %u)", -+ litSize, MIN_LITERALS_FOR_4_STREAMS); - RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); - RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, ""); - ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0); -@@ -176,13 +200,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt - - if (litEncType==set_repeat) { - if (singleStream) { -- hufSuccess = HUF_decompress1X_usingDTable_bmi2( -+ hufSuccess = HUF_decompress1X_usingDTable( - dctx->litBuffer, litSize, istart+lhSize, litCSize, -- dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); -+ dctx->HUFptr, flags); - } else { -- hufSuccess = HUF_decompress4X_usingDTable_bmi2( -+ assert(litSize >= MIN_LITERALS_FOR_4_STREAMS); -+ hufSuccess = HUF_decompress4X_usingDTable( - dctx->litBuffer, litSize, istart+lhSize, litCSize, -- dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); -+ dctx->HUFptr, flags); - } - } else { - if (singleStream) { -@@ -190,26 +215,28 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt - hufSuccess = HUF_decompress1X_DCtx_wksp( - dctx->entropy.hufTable, dctx->litBuffer, litSize, - istart+lhSize, litCSize, dctx->workspace, -- sizeof(dctx->workspace)); -+ sizeof(dctx->workspace), flags); - #else -- hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( -+ hufSuccess = HUF_decompress1X1_DCtx_wksp( - dctx->entropy.hufTable, dctx->litBuffer, litSize, - istart+lhSize, litCSize, dctx->workspace, -- sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx)); -+ sizeof(dctx->workspace), flags); - #endif - } else { -- hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( -+ hufSuccess = HUF_decompress4X_hufOnly_wksp( - dctx->entropy.hufTable, dctx->litBuffer, litSize, - istart+lhSize, litCSize, dctx->workspace, -- sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx)); -+ sizeof(dctx->workspace), flags); - } - } - if (dctx->litBufferLocation == ZSTD_split) - { -+ assert(litSize > ZSTD_LITBUFFEREXTRASIZE); - ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE); - ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE); - dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH; - dctx->litBufferEnd -= WILDCOPY_OVERLENGTH; -+ assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax); - } - - RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); -@@ -224,7 +251,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt - case set_basic: - { size_t litSize, lhSize; - U32 const lhlCode = ((istart[0]) >> 2) & 3; -- size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); -+ size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity); - switch(lhlCode) - { - case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ -@@ -237,11 +264,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt - break; - case 3: - lhSize = 3; -+ RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3"); - litSize = MEM_readLE24(istart) >> 4; - break; - } - - RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); -+ RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, ""); - RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, ""); - ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1); - if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ -@@ -270,7 +299,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt - case set_rle: - { U32 const lhlCode = ((istart[0]) >> 2) & 3; - size_t litSize, lhSize; -- size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); -+ size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity); - switch(lhlCode) - { - case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ -@@ -279,16 +308,17 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt - break; - case 1: - lhSize = 2; -+ RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3"); - litSize = MEM_readLE16(istart) >> 4; - break; - case 3: - lhSize = 3; -+ RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4"); - litSize = MEM_readLE24(istart) >> 4; -- RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); - break; - } - RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); -- RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); -+ RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, ""); - RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, ""); - ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1); - if (dctx->litBufferLocation == ZSTD_split) -@@ -310,6 +340,18 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt - } - } - -+/* Hidden declaration for fullbench */ -+size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx, -+ const void* src, size_t srcSize, -+ void* dst, size_t dstCapacity); -+size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx, -+ const void* src, size_t srcSize, -+ void* dst, size_t dstCapacity) -+{ -+ dctx->isFrameDecompression = 0; -+ return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, not_streaming); -+} -+ - /* Default FSE distribution tables. - * These are pre-calculated FSE decoding tables using default distributions as defined in specification : - * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions -@@ -317,7 +359,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt - * - start from default distributions, present in /lib/common/zstd_internal.h - * - generate tables normally, using ZSTD_buildFSETable() - * - printout the content of tables -- * - pretify output, report below, test with fuzzer to ensure it's correct */ -+ * - prettify output, report below, test with fuzzer to ensure it's correct */ - - /* Default FSE distribution table for Literal Lengths */ - static const ZSTD_seqSymbol LL_defaultDTable[(1<=0); -+ pos += (size_t)n; - } - } - /* Now we spread those positions across the table. -- * The benefit of doing it in two stages is that we avoid the the -+ * The benefit of doing it in two stages is that we avoid the - * variable size inner loop, which caused lots of branch misses. - * Now we can run through all the positions without any branch misses. -- * We unroll the loop twice, since that is what emperically worked best. -+ * We unroll the loop twice, since that is what empirically worked best. - */ - { - size_t position = 0; -@@ -540,7 +583,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSym - for (i=0; i highThreshold) position = (position + step) & tableMask; /* lowprob area */ -+ while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; /* lowprob area */ - } } - assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ - } -@@ -551,7 +594,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSym - for (u=0; u 0x7F) { - if (nbSeq == 0xFF) { - RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); -@@ -681,11 +719,19 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* - } - *nbSeqPtr = nbSeq; - -+ if (nbSeq == 0) { -+ /* No sequence : section ends immediately */ -+ RETURN_ERROR_IF(ip != iend, corruption_detected, -+ "extraneous data present in the Sequences section"); -+ return (size_t)(ip - istart); -+ } -+ - /* FSE table descriptors */ - RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ -- { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); -- symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); -- symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); -+ RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */ -+ { SymbolEncodingType_e const LLtype = (SymbolEncodingType_e)(*ip >> 6); -+ SymbolEncodingType_e const OFtype = (SymbolEncodingType_e)((*ip >> 4) & 3); -+ SymbolEncodingType_e const MLtype = (SymbolEncodingType_e)((*ip >> 2) & 3); - ip++; - - /* Build DTables */ -@@ -829,7 +875,7 @@ static void ZSTD_safecopy(BYTE* op, cons - /* ZSTD_safecopyDstBeforeSrc(): - * This version allows overlap with dst before src, or handles the non-overlap case with dst after src - * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */ --static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) { -+static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) { - ptrdiff_t const diff = op - ip; - BYTE* const oend = op + length; - -@@ -858,6 +904,7 @@ static void ZSTD_safecopyDstBeforeSrc(BY - * to be optimized for many small sequences, since those fall into ZSTD_execSequence(). - */ - FORCE_NOINLINE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_execSequenceEnd(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, -@@ -905,6 +952,7 @@ size_t ZSTD_execSequenceEnd(BYTE* op, - * This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case. - */ - FORCE_NOINLINE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op, - BYTE* const oend, const BYTE* const oend_w, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, -@@ -950,6 +998,7 @@ size_t ZSTD_execSequenceEndSplitLitBuffe - } - - HINT_INLINE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_execSequence(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, -@@ -964,6 +1013,11 @@ size_t ZSTD_execSequence(BYTE* op, - - assert(op != NULL /* Precondition */); - assert(oend_w < oend /* No underflow */); -+ -+#if defined(__aarch64__) -+ /* prefetch sequence starting from match that will be used for copy later */ -+ PREFETCH_L1(match); -+#endif - /* Handle edge cases in a slow path: - * - Read beyond end of literals - * - Match end is within WILDCOPY_OVERLIMIT of oend -@@ -1043,6 +1097,7 @@ size_t ZSTD_execSequence(BYTE* op, - } - - HINT_INLINE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op, - BYTE* const oend, const BYTE* const oend_w, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, -@@ -1154,7 +1209,7 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseSta - } - - /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum -- * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) -+ * offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32 - * bits before reloading. This value is the maximum number of bytes we read - * after reloading when we are decoding long offsets. - */ -@@ -1165,13 +1220,37 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseSta - - typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; - -+/* -+ * ZSTD_decodeSequence(): -+ * @p longOffsets : tells the decoder to reload more bit while decoding large offsets -+ * only used in 32-bit mode -+ * @return : Sequence (litL + matchL + offset) -+ */ - FORCE_INLINE_TEMPLATE seq_t --ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) -+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq) - { - seq_t seq; -+ /* -+ * ZSTD_seqSymbol is a 64 bits wide structure. -+ * It can be loaded in one operation -+ * and its fields extracted by simply shifting or bit-extracting on aarch64. -+ * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh -+ * operations that cause performance drop. This can be avoided by using this -+ * ZSTD_memcpy hack. -+ */ -+#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__)) -+ ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS; -+ ZSTD_seqSymbol* const llDInfo = &llDInfoS; -+ ZSTD_seqSymbol* const mlDInfo = &mlDInfoS; -+ ZSTD_seqSymbol* const ofDInfo = &ofDInfoS; -+ ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol)); -+ ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol)); -+ ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol)); -+#else - const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state; - const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state; - const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state; -+#endif - seq.matchLength = mlDInfo->baseValue; - seq.litLength = llDInfo->baseValue; - { U32 const ofBase = ofDInfo->baseValue; -@@ -1186,28 +1265,31 @@ ZSTD_decodeSequence(seqState_t* seqState - U32 const llnbBits = llDInfo->nbBits; - U32 const mlnbBits = mlDInfo->nbBits; - U32 const ofnbBits = ofDInfo->nbBits; -+ -+ assert(llBits <= MaxLLBits); -+ assert(mlBits <= MaxMLBits); -+ assert(ofBits <= MaxOff); - /* - * As gcc has better branch and block analyzers, sometimes it is only -- * valuable to mark likelyness for clang, it gives around 3-4% of -+ * valuable to mark likeliness for clang, it gives around 3-4% of - * performance. - */ - - /* sequence */ - { size_t offset; -- #if defined(__clang__) -- if (LIKELY(ofBits > 1)) { -- #else - if (ofBits > 1) { -- #endif - ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); - ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); -- assert(ofBits <= MaxOff); -+ ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32); -+ ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits); - if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { -- U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); -+ /* Always read extra bits, this keeps the logic simple, -+ * avoids branches, and avoids accidentally reading 0 bits. -+ */ -+ U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32; - offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); - BIT_reloadDStream(&seqState->DStream); -- if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); -- assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ -+ offset += BIT_readBitsFast(&seqState->DStream, extraBits); - } else { - offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); -@@ -1224,7 +1306,7 @@ ZSTD_decodeSequence(seqState_t* seqState - } else { - offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); - { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; -- temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ -+ temp -= !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */ - if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; -@@ -1232,11 +1314,7 @@ ZSTD_decodeSequence(seqState_t* seqState - seq.offset = offset; - } - -- #if defined(__clang__) -- if (UNLIKELY(mlBits > 0)) -- #else - if (mlBits > 0) -- #endif - seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); - - if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) -@@ -1246,11 +1324,7 @@ ZSTD_decodeSequence(seqState_t* seqState - /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ - ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); - -- #if defined(__clang__) -- if (UNLIKELY(llBits > 0)) -- #else - if (llBits > 0) -- #endif - seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); - - if (MEM_32bits()) -@@ -1259,17 +1333,22 @@ ZSTD_decodeSequence(seqState_t* seqState - DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", - (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); - -- ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */ -- ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */ -- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ -- ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */ -+ if (!isLastSeq) { -+ /* don't update FSE state for last Sequence */ -+ ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */ -+ ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */ -+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ -+ ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */ -+ BIT_reloadDStream(&seqState->DStream); -+ } - } - - return seq; - } - --#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION --MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) -+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) -+#if DEBUGLEVEL >= 1 -+static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) - { - size_t const windowSize = dctx->fParams.windowSize; - /* No dictionary used. */ -@@ -1283,30 +1362,33 @@ MEM_STATIC int ZSTD_dictionaryIsActive(Z - /* Dictionary is active. */ - return 1; - } -+#endif - --MEM_STATIC void ZSTD_assertValidSequence( -+static void ZSTD_assertValidSequence( - ZSTD_DCtx const* dctx, - BYTE const* op, BYTE const* oend, - seq_t const seq, - BYTE const* prefixStart, BYTE const* virtualStart) - { - #if DEBUGLEVEL >= 1 -- size_t const windowSize = dctx->fParams.windowSize; -- size_t const sequenceSize = seq.litLength + seq.matchLength; -- BYTE const* const oLitEnd = op + seq.litLength; -- DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", -- (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); -- assert(op <= oend); -- assert((size_t)(oend - op) >= sequenceSize); -- assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); -- if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { -- size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); -- /* Offset must be within the dictionary. */ -- assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); -- assert(seq.offset <= windowSize + dictSize); -- } else { -- /* Offset must be within our window. */ -- assert(seq.offset <= windowSize); -+ if (dctx->isFrameDecompression) { -+ size_t const windowSize = dctx->fParams.windowSize; -+ size_t const sequenceSize = seq.litLength + seq.matchLength; -+ BYTE const* const oLitEnd = op + seq.litLength; -+ DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", -+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); -+ assert(op <= oend); -+ assert((size_t)(oend - op) >= sequenceSize); -+ assert(sequenceSize <= ZSTD_blockSizeMax(dctx)); -+ if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { -+ size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); -+ /* Offset must be within the dictionary. */ -+ assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); -+ assert(seq.offset <= windowSize + dictSize); -+ } else { -+ /* Offset must be within our window. */ -+ assert(seq.offset <= windowSize); -+ } - } - #else - (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart; -@@ -1322,23 +1404,21 @@ DONT_VECTORIZE - ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { - const BYTE* ip = (const BYTE*)seqStart; - const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE*)dst; -- BYTE* const oend = ostart + maxDstSize; -+ BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, maxDstSize); - BYTE* op = ostart; - const BYTE* litPtr = dctx->litPtr; - const BYTE* litBufferEnd = dctx->litBufferEnd; - const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); - const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); - const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); -- DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer"); -- (void)frame; -+ DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq); - -- /* Regen sequences */ -+ /* Literals are split between internal buffer & output buffer */ - if (nbSeq) { - seqState_t seqState; - dctx->fseEntropy = 1; -@@ -1357,8 +1437,7 @@ ZSTD_decompressSequences_bodySplitLitBuf - BIT_DStream_completed < BIT_DStream_overflow); - - /* decompress without overrunning litPtr begins */ -- { -- seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset); -+ { seq_t sequence = {0,0,0}; /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */ - /* Align the decompression loop to 32 + 16 bytes. - * - * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression -@@ -1420,27 +1499,26 @@ ZSTD_decompressSequences_bodySplitLitBuf - #endif - - /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */ -- for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) { -- size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); -+ for ( ; nbSeq; nbSeq--) { -+ sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); -+ if (litPtr + sequence.litLength > dctx->litBufferEnd) break; -+ { size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); - #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) -- assert(!ZSTD_isError(oneSeqSize)); -- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); -+ assert(!ZSTD_isError(oneSeqSize)); -+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); - #endif -- if (UNLIKELY(ZSTD_isError(oneSeqSize))) -- return oneSeqSize; -- DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); -- op += oneSeqSize; -- if (UNLIKELY(!--nbSeq)) -- break; -- BIT_reloadDStream(&(seqState.DStream)); -- sequence = ZSTD_decodeSequence(&seqState, isLongOffset); -- } -+ if (UNLIKELY(ZSTD_isError(oneSeqSize))) -+ return oneSeqSize; -+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); -+ op += oneSeqSize; -+ } } -+ DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)"); - - /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */ - if (nbSeq > 0) { - const size_t leftoverLit = dctx->litBufferEnd - litPtr; -- if (leftoverLit) -- { -+ DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength); -+ if (leftoverLit) { - RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); - ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); - sequence.litLength -= leftoverLit; -@@ -1449,24 +1527,22 @@ ZSTD_decompressSequences_bodySplitLitBuf - litPtr = dctx->litExtraBuffer; - litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; - dctx->litBufferLocation = ZSTD_not_in_dst; -- { -- size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); -+ { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); - #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); -- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); -+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); - #endif - if (UNLIKELY(ZSTD_isError(oneSeqSize))) - return oneSeqSize; - DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); - op += oneSeqSize; -- if (--nbSeq) -- BIT_reloadDStream(&(seqState.DStream)); - } -+ nbSeq--; - } - } - -- if (nbSeq > 0) /* there is remaining lit from extra buffer */ -- { -+ if (nbSeq > 0) { -+ /* there is remaining lit from extra buffer */ - - #if defined(__x86_64__) - __asm__(".p2align 6"); -@@ -1485,35 +1561,34 @@ ZSTD_decompressSequences_bodySplitLitBuf - # endif - #endif - -- for (; ; ) { -- seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); -+ for ( ; nbSeq ; nbSeq--) { -+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); - #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); -- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); -+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); - #endif - if (UNLIKELY(ZSTD_isError(oneSeqSize))) - return oneSeqSize; - DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); - op += oneSeqSize; -- if (UNLIKELY(!--nbSeq)) -- break; -- BIT_reloadDStream(&(seqState.DStream)); - } - } - - /* check if reached exact end */ - DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq); - RETURN_ERROR_IF(nbSeq, corruption_detected, ""); -- RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); -+ DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed); -+ RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, ""); - /* save reps for next block */ - { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } - } - - /* last literal segment */ -- if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */ -- { -- size_t const lastLLSize = litBufferEnd - litPtr; -+ if (dctx->litBufferLocation == ZSTD_split) { -+ /* split hasn't been reached yet, first get dst then copy litExtraBuffer */ -+ size_t const lastLLSize = (size_t)(litBufferEnd - litPtr); -+ DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize); - RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, ""); - if (op != NULL) { - ZSTD_memmove(op, litPtr, lastLLSize); -@@ -1523,15 +1598,17 @@ ZSTD_decompressSequences_bodySplitLitBuf - litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; - dctx->litBufferLocation = ZSTD_not_in_dst; - } -- { size_t const lastLLSize = litBufferEnd - litPtr; -+ /* copy last literals from internal buffer */ -+ { size_t const lastLLSize = (size_t)(litBufferEnd - litPtr); -+ DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize); - RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); - if (op != NULL) { - ZSTD_memcpy(op, litPtr, lastLLSize); - op += lastLLSize; -- } -- } -+ } } - -- return op-ostart; -+ DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart)); -+ return (size_t)(op - ostart); - } - - FORCE_INLINE_TEMPLATE size_t -@@ -1539,21 +1616,19 @@ DONT_VECTORIZE - ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { - const BYTE* ip = (const BYTE*)seqStart; - const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE*)dst; -- BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer; -+ BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ostart, maxDstSize) : dctx->litBuffer; - BYTE* op = ostart; - const BYTE* litPtr = dctx->litPtr; - const BYTE* const litEnd = litPtr + dctx->litSize; - const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart); - const BYTE* const vBase = (const BYTE*)(dctx->virtualStart); - const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd); -- DEBUGLOG(5, "ZSTD_decompressSequences_body"); -- (void)frame; -+ DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq); - - /* Regen sequences */ - if (nbSeq) { -@@ -1568,11 +1643,6 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* - ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); - assert(dst != NULL); - -- ZSTD_STATIC_ASSERT( -- BIT_DStream_unfinished < BIT_DStream_completed && -- BIT_DStream_endOfBuffer < BIT_DStream_completed && -- BIT_DStream_completed < BIT_DStream_overflow); -- - #if defined(__x86_64__) - __asm__(".p2align 6"); - __asm__("nop"); -@@ -1587,73 +1657,70 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* - # endif - #endif - -- for ( ; ; ) { -- seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); -+ for ( ; nbSeq ; nbSeq--) { -+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); - #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); -- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); -+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); - #endif - if (UNLIKELY(ZSTD_isError(oneSeqSize))) - return oneSeqSize; - DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); - op += oneSeqSize; -- if (UNLIKELY(!--nbSeq)) -- break; -- BIT_reloadDStream(&(seqState.DStream)); - } - - /* check if reached exact end */ -- DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); -- RETURN_ERROR_IF(nbSeq, corruption_detected, ""); -- RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); -+ assert(nbSeq == 0); -+ RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, ""); - /* save reps for next block */ - { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } - } - - /* last literal segment */ -- { size_t const lastLLSize = litEnd - litPtr; -+ { size_t const lastLLSize = (size_t)(litEnd - litPtr); -+ DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize); - RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); - if (op != NULL) { - ZSTD_memcpy(op, litPtr, lastLLSize); - op += lastLLSize; -- } -- } -+ } } - -- return op-ostart; -+ DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart)); -+ return (size_t)(op - ostart); - } - - static size_t - ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { -- return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - - static size_t - ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { -- return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ - - #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT - --FORCE_INLINE_TEMPLATE size_t --ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence, -+FORCE_INLINE_TEMPLATE -+ -+size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence, - const BYTE* const prefixStart, const BYTE* const dictEnd) - { - prefetchPos += sequence.litLength; - { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart; -- const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. -- * No consequence though : memory address is only used for prefetching, not for dereferencing */ -+ /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. -+ * No consequence though : memory address is only used for prefetching, not for dereferencing */ -+ const BYTE* const match = ZSTD_wrappedPtrSub(ZSTD_wrappedPtrAdd(matchBase, prefetchPos), sequence.offset); - PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ - } - return prefetchPos + sequence.matchLength; -@@ -1668,20 +1735,18 @@ ZSTD_decompressSequencesLong_body( - ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { - const BYTE* ip = (const BYTE*)seqStart; - const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE*)dst; -- BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize; -+ BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ostart, maxDstSize); - BYTE* op = ostart; - const BYTE* litPtr = dctx->litPtr; - const BYTE* litBufferEnd = dctx->litBufferEnd; - const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); - const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); - const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); -- (void)frame; - - /* Regen sequences */ - if (nbSeq) { -@@ -1706,20 +1771,17 @@ ZSTD_decompressSequencesLong_body( - ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); - - /* prepare in advance */ -- for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNblitBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) -- { -+ if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) { - /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */ - const size_t leftoverLit = dctx->litBufferEnd - litPtr; - if (leftoverLit) -@@ -1732,26 +1794,26 @@ ZSTD_decompressSequencesLong_body( - litPtr = dctx->litExtraBuffer; - litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; - dctx->litBufferLocation = ZSTD_not_in_dst; -- oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); -+ { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); - #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) -- assert(!ZSTD_isError(oneSeqSize)); -- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); -+ assert(!ZSTD_isError(oneSeqSize)); -+ ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); - #endif -- if (ZSTD_isError(oneSeqSize)) return oneSeqSize; -+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - -- prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); -- sequences[seqNb & STORED_SEQS_MASK] = sequence; -- op += oneSeqSize; -- } -+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); -+ sequences[seqNb & STORED_SEQS_MASK] = sequence; -+ op += oneSeqSize; -+ } } - else - { - /* lit buffer is either wholly contained in first or second split, or not split at all*/ -- oneSeqSize = dctx->litBufferLocation == ZSTD_split ? -+ size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ? - ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) : - ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); - #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); -- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); -+ ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); - #endif - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - -@@ -1760,17 +1822,15 @@ ZSTD_decompressSequencesLong_body( - op += oneSeqSize; - } - } -- RETURN_ERROR_IF(seqNblitBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) -- { -+ if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) { - const size_t leftoverLit = dctx->litBufferEnd - litPtr; -- if (leftoverLit) -- { -+ if (leftoverLit) { - RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); - ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); - sequence->litLength -= leftoverLit; -@@ -1779,11 +1839,10 @@ ZSTD_decompressSequencesLong_body( - litPtr = dctx->litExtraBuffer; - litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; - dctx->litBufferLocation = ZSTD_not_in_dst; -- { -- size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); -+ { size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); - #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); -- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); -+ ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); - #endif - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - op += oneSeqSize; -@@ -1796,7 +1855,7 @@ ZSTD_decompressSequencesLong_body( - ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); - #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); -- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); -+ ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); - #endif - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - op += oneSeqSize; -@@ -1808,8 +1867,7 @@ ZSTD_decompressSequencesLong_body( - } - - /* last literal segment */ -- if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */ -- { -+ if (dctx->litBufferLocation == ZSTD_split) { /* first deplete literal buffer in dst, then copy litExtraBuffer */ - size_t const lastLLSize = litBufferEnd - litPtr; - RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, ""); - if (op != NULL) { -@@ -1827,17 +1885,16 @@ ZSTD_decompressSequencesLong_body( - } - } - -- return op-ostart; -+ return (size_t)(op - ostart); - } - - static size_t - ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { -- return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ - -@@ -1851,20 +1908,18 @@ DONT_VECTORIZE - ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { -- return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - static BMI2_TARGET_ATTRIBUTE size_t - DONT_VECTORIZE - ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { -- return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ - -@@ -1873,50 +1928,40 @@ static BMI2_TARGET_ATTRIBUTE size_t - ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { -- return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ - - #endif /* DYNAMIC_BMI2 */ - --typedef size_t (*ZSTD_decompressSequences_t)( -- ZSTD_DCtx* dctx, -- void* dst, size_t maxDstSize, -- const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame); -- - #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG - static size_t - ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { - DEBUGLOG(5, "ZSTD_decompressSequences"); - #if DYNAMIC_BMI2 - if (ZSTD_DCtx_get_bmi2(dctx)) { -- return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - #endif -- return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - static size_t - ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { - DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer"); - #if DYNAMIC_BMI2 - if (ZSTD_DCtx_get_bmi2(dctx)) { -- return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - #endif -- return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ - -@@ -1931,69 +1976,114 @@ static size_t - ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { - DEBUGLOG(5, "ZSTD_decompressSequencesLong"); - #if DYNAMIC_BMI2 - if (ZSTD_DCtx_get_bmi2(dctx)) { -- return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - #endif -- return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ - - -+/* -+ * @returns The total size of the history referenceable by zstd, including -+ * both the prefix and the extDict. At @p op any offset larger than this -+ * is invalid. -+ */ -+static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart) -+{ -+ return (size_t)(op - virtualStart); -+} - --#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ -- !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) --/* ZSTD_getLongOffsetsShare() : -+typedef struct { -+ unsigned longOffsetShare; -+ unsigned maxNbAdditionalBits; -+} ZSTD_OffsetInfo; -+ -+/* ZSTD_getOffsetInfo() : - * condition : offTable must be valid - * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) -- * compared to maximum possible of (1< 22) total += 1; -+ * compared to maximum possible of (1< 22) info.longOffsetShare += 1; -+ } -+ -+ assert(tableLog <= OffFSELog); -+ info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */ - } - -- assert(tableLog <= OffFSELog); -- total <<= (OffFSELog - tableLog); /* scale to OffFSELog */ -+ return info; -+} - -- return total; -+/* -+ * @returns The maximum offset we can decode in one read of our bitstream, without -+ * reloading more bits in the middle of the offset bits read. Any offsets larger -+ * than this must use the long offset decoder. -+ */ -+static size_t ZSTD_maxShortOffset(void) -+{ -+ if (MEM_64bits()) { -+ /* We can decode any offset without reloading bits. -+ * This might change if the max window size grows. -+ */ -+ ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); -+ return (size_t)-1; -+ } else { -+ /* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1. -+ * This offBase would require STREAM_ACCUMULATOR_MIN extra bits. -+ * Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset. -+ */ -+ size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1; -+ size_t const maxOffset = maxOffbase - ZSTD_REP_NUM; -+ assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN); -+ return maxOffset; -+ } - } --#endif - - size_t - ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, -- const void* src, size_t srcSize, const int frame, const streaming_operation streaming) -+ const void* src, size_t srcSize, const streaming_operation streaming) - { /* blockType == blockCompressed */ - const BYTE* ip = (const BYTE*)src; -- /* isLongOffset must be true if there are long offsets. -- * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. -- * We don't expect that to be the case in 64-bit mode. -- * In block mode, window size is not known, so we have to be conservative. -- * (note: but it could be evaluated from current-lowLimit) -- */ -- ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); -- DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); -+ DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize); - -- RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); -+ /* Note : the wording of the specification -+ * allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx). -+ * This generally does not happen, as it makes little sense, -+ * since an uncompressed block would feature same size and have no decompression cost. -+ * Also, note that decoder from reference libzstd before < v1.5.4 -+ * would consider this edge case as an error. -+ * As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx) -+ * for broader compatibility with the deployed ecosystem of zstd decoders */ -+ RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, ""); - - /* Decode literals section */ - { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming); -- DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); -+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize); - if (ZSTD_isError(litCSize)) return litCSize; - ip += litCSize; - srcSize -= litCSize; -@@ -2001,6 +2091,23 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* - - /* Build Decoding Tables */ - { -+ /* Compute the maximum block size, which must also work when !frame and fParams are unset. -+ * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t. -+ */ -+ size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx)); -+ size_t const totalHistorySize = ZSTD_totalHistorySize(ZSTD_maybeNullPtrAdd((BYTE*)dst, blockSizeMax), (BYTE const*)dctx->virtualStart); -+ /* isLongOffset must be true if there are long offsets. -+ * Offsets are long if they are larger than ZSTD_maxShortOffset(). -+ * We don't expect that to be the case in 64-bit mode. -+ * -+ * We check here to see if our history is large enough to allow long offsets. -+ * If it isn't, then we can't possible have (valid) long offsets. If the offset -+ * is invalid, then it is okay to read it incorrectly. -+ * -+ * If isLongOffsets is true, then we will later check our decoding table to see -+ * if it is even possible to generate long offsets. -+ */ -+ ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset())); - /* These macros control at build-time which decompressor implementation - * we use. If neither is defined, we do some inspection and dispatch at - * runtime. -@@ -2008,6 +2115,11 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* - #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ - !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) - int usePrefetchDecoder = dctx->ddictIsCold; -+#else -+ /* Set to 1 to avoid computing offset info if we don't need to. -+ * Otherwise this value is ignored. -+ */ -+ int usePrefetchDecoder = 1; - #endif - int nbSeq; - size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); -@@ -2015,40 +2127,55 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* - ip += seqHSize; - srcSize -= seqHSize; - -- RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); -- --#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ -- !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) -- if ( !usePrefetchDecoder -- && (!frame || (dctx->fParams.windowSize > (1<<24))) -- && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ -- U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); -- U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ -- usePrefetchDecoder = (shareLongOffsets >= minShare); -+ RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); -+ RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall, -+ "invalid dst"); -+ -+ /* If we could potentially have long offsets, or we might want to use the prefetch decoder, -+ * compute information about the share of long offsets, and the maximum nbAdditionalBits. -+ * NOTE: could probably use a larger nbSeq limit -+ */ -+ if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) { -+ ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq); -+ if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) { -+ /* If isLongOffset, but the maximum number of additional bits that we see in our table is small -+ * enough, then we know it is impossible to have too long an offset in this block, so we can -+ * use the regular offset decoder. -+ */ -+ isLongOffset = ZSTD_lo_isRegularOffset; -+ } -+ if (!usePrefetchDecoder) { -+ U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ -+ usePrefetchDecoder = (info.longOffsetShare >= minShare); -+ } - } --#endif - - dctx->ddictIsCold = 0; - - #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ - !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) -- if (usePrefetchDecoder) -+ if (usePrefetchDecoder) { -+#else -+ (void)usePrefetchDecoder; -+ { - #endif - #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -- return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); - #endif -+ } - - #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG - /* else */ - if (dctx->litBufferLocation == ZSTD_split) -- return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); - else -- return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); - #endif - } - } - - -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize) - { - if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */ -@@ -2060,13 +2187,24 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dct - } - - --size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, -- void* dst, size_t dstCapacity, -- const void* src, size_t srcSize) -+size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize) - { - size_t dSize; -+ dctx->isFrameDecompression = 0; - ZSTD_checkContinuity(dctx, dst, dstCapacity); -- dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming); -+ dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming); -+ FORWARD_IF_ERROR(dSize, ""); - dctx->previousDstEnd = (char*)dst + dSize; - return dSize; - } -+ -+ -+/* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */ -+size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize) -+{ -+ return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize); -+} ---- a/lib/zstd/decompress/zstd_decompress_block.h -+++ b/lib/zstd/decompress/zstd_decompress_block.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -47,7 +48,7 @@ typedef enum { - */ - size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, -- const void* src, size_t srcSize, const int frame, const streaming_operation streaming); -+ const void* src, size_t srcSize, const streaming_operation streaming); - - /* ZSTD_buildFSETable() : - * generate FSE decoding table for one symbol (ll, ml or off) -@@ -64,5 +65,10 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* - unsigned tableLog, void* wksp, size_t wkspSize, - int bmi2); - -+/* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */ -+size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize); -+ - - #endif /* ZSTD_DEC_BLOCK_H */ ---- a/lib/zstd/decompress/zstd_decompress_internal.h -+++ b/lib/zstd/decompress/zstd_decompress_internal.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -75,12 +76,13 @@ static UNUSED_ATTR const U32 ML_base[Max - - #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) - #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) -+#define ZSTD_HUFFDTABLE_CAPACITY_LOG 12 - - typedef struct { - ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ - ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ - ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ -- HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ -+ HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */ - U32 rep[ZSTD_REP_NUM]; - U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; - } ZSTD_entropyDTables_t; -@@ -135,7 +137,7 @@ struct ZSTD_DCtx_s - const void* virtualStart; /* virtual start of previous segment if it was just before current one */ - const void* dictEnd; /* end of previous segment */ - size_t expected; -- ZSTD_frameHeader fParams; -+ ZSTD_FrameHeader fParams; - U64 processedCSize; - U64 decodedSize; - blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ -@@ -152,7 +154,8 @@ struct ZSTD_DCtx_s - size_t litSize; - size_t rleSize; - size_t staticSize; --#if DYNAMIC_BMI2 != 0 -+ int isFrameDecompression; -+#if DYNAMIC_BMI2 - int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ - #endif - -@@ -164,6 +167,8 @@ struct ZSTD_DCtx_s - ZSTD_dictUses_e dictUses; - ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ - ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ -+ int disableHufAsm; -+ int maxBlockSizeParam; - - /* streaming */ - ZSTD_dStreamStage streamStage; -@@ -199,11 +204,11 @@ struct ZSTD_DCtx_s - }; /* typedef'd to ZSTD_DCtx within "zstd.h" */ - - MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) { --#if DYNAMIC_BMI2 != 0 -- return dctx->bmi2; -+#if DYNAMIC_BMI2 -+ return dctx->bmi2; - #else - (void)dctx; -- return 0; -+ return 0; - #endif - } - ---- a/lib/zstd/decompress_sources.h -+++ b/lib/zstd/decompress_sources.h -@@ -1,6 +1,6 @@ - /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the ---- a/lib/zstd/zstd_common_module.c -+++ b/lib/zstd/zstd_common_module.c -@@ -1,6 +1,6 @@ - // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -24,9 +24,6 @@ EXPORT_SYMBOL_GPL(HUF_readStats_wksp); - EXPORT_SYMBOL_GPL(ZSTD_isError); - EXPORT_SYMBOL_GPL(ZSTD_getErrorName); - EXPORT_SYMBOL_GPL(ZSTD_getErrorCode); --EXPORT_SYMBOL_GPL(ZSTD_customMalloc); --EXPORT_SYMBOL_GPL(ZSTD_customCalloc); --EXPORT_SYMBOL_GPL(ZSTD_customFree); - - MODULE_LICENSE("Dual BSD/GPL"); - MODULE_DESCRIPTION("Zstd Common"); ---- a/lib/zstd/zstd_compress_module.c -+++ b/lib/zstd/zstd_compress_module.c -@@ -1,6 +1,6 @@ - // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -16,6 +16,7 @@ - - #include "common/zstd_deps.h" - #include "common/zstd_internal.h" -+#include "compress/zstd_compress_internal.h" - - #define ZSTD_FORWARD_IF_ERR(ret) \ - do { \ -@@ -85,6 +86,12 @@ zstd_parameters zstd_get_params(int leve - } - EXPORT_SYMBOL(zstd_get_params); - -+size_t zstd_cctx_set_param(zstd_cctx *cctx, ZSTD_cParameter param, int value) -+{ -+ return ZSTD_CCtx_setParameter(cctx, param, value); -+} -+EXPORT_SYMBOL(zstd_cctx_set_param); -+ - zstd_compression_parameters zstd_get_cparams(int level, - unsigned long long estimated_src_size, size_t dict_size) - { -@@ -98,6 +105,52 @@ size_t zstd_cctx_workspace_bound(const z - } - EXPORT_SYMBOL(zstd_cctx_workspace_bound); - -+// Used by zstd_cctx_workspace_bound_with_ext_seq_prod() -+static size_t dummy_external_sequence_producer( -+ void *sequenceProducerState, -+ ZSTD_Sequence *outSeqs, size_t outSeqsCapacity, -+ const void *src, size_t srcSize, -+ const void *dict, size_t dictSize, -+ int compressionLevel, -+ size_t windowSize) -+{ -+ (void)sequenceProducerState; -+ (void)outSeqs; (void)outSeqsCapacity; -+ (void)src; (void)srcSize; -+ (void)dict; (void)dictSize; -+ (void)compressionLevel; -+ (void)windowSize; -+ return ZSTD_SEQUENCE_PRODUCER_ERROR; -+} -+ -+static void init_cctx_params_from_compress_params( -+ ZSTD_CCtx_params *cctx_params, -+ const zstd_compression_parameters *compress_params) -+{ -+ ZSTD_parameters zstd_params; -+ memset(&zstd_params, 0, sizeof(zstd_params)); -+ zstd_params.cParams = *compress_params; -+ ZSTD_CCtxParams_init_advanced(cctx_params, zstd_params); -+} -+ -+size_t zstd_cctx_workspace_bound_with_ext_seq_prod(const zstd_compression_parameters *compress_params) -+{ -+ ZSTD_CCtx_params cctx_params; -+ init_cctx_params_from_compress_params(&cctx_params, compress_params); -+ ZSTD_CCtxParams_registerSequenceProducer(&cctx_params, NULL, dummy_external_sequence_producer); -+ return ZSTD_estimateCCtxSize_usingCCtxParams(&cctx_params); -+} -+EXPORT_SYMBOL(zstd_cctx_workspace_bound_with_ext_seq_prod); -+ -+size_t zstd_cstream_workspace_bound_with_ext_seq_prod(const zstd_compression_parameters *compress_params) -+{ -+ ZSTD_CCtx_params cctx_params; -+ init_cctx_params_from_compress_params(&cctx_params, compress_params); -+ ZSTD_CCtxParams_registerSequenceProducer(&cctx_params, NULL, dummy_external_sequence_producer); -+ return ZSTD_estimateCStreamSize_usingCCtxParams(&cctx_params); -+} -+EXPORT_SYMBOL(zstd_cstream_workspace_bound_with_ext_seq_prod); -+ - zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size) - { - if (workspace == NULL) -@@ -209,5 +262,25 @@ size_t zstd_end_stream(zstd_cstream *cst - } - EXPORT_SYMBOL(zstd_end_stream); - -+void zstd_register_sequence_producer( -+ zstd_cctx *cctx, -+ void* sequence_producer_state, -+ zstd_sequence_producer_f sequence_producer -+) { -+ ZSTD_registerSequenceProducer(cctx, sequence_producer_state, sequence_producer); -+} -+EXPORT_SYMBOL(zstd_register_sequence_producer); -+ -+size_t zstd_compress_sequences_and_literals(zstd_cctx *cctx, void* dst, size_t dst_capacity, -+ const zstd_sequence *in_seqs, size_t in_seqs_size, -+ const void* literals, size_t lit_size, size_t lit_capacity, -+ size_t decompressed_size) -+{ -+ return ZSTD_compressSequencesAndLiterals(cctx, dst, dst_capacity, in_seqs, -+ in_seqs_size, literals, lit_size, -+ lit_capacity, decompressed_size); -+} -+EXPORT_SYMBOL(zstd_compress_sequences_and_literals); -+ - MODULE_LICENSE("Dual BSD/GPL"); - MODULE_DESCRIPTION("Zstd Compressor"); ---- a/lib/zstd/zstd_decompress_module.c -+++ b/lib/zstd/zstd_decompress_module.c -@@ -1,6 +1,6 @@ - // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -113,7 +113,7 @@ EXPORT_SYMBOL(zstd_init_dstream); - - size_t zstd_reset_dstream(zstd_dstream *dstream) - { -- return ZSTD_resetDStream(dstream); -+ return ZSTD_DCtx_reset(dstream, ZSTD_reset_session_only); - } - EXPORT_SYMBOL(zstd_reset_dstream); - diff --git a/debian/patches/patchset-pf/zstd/0002-lib-zstd-Refactor-intentional-wrap-around-test.patch b/debian/patches/patchset-pf/zstd/0002-lib-zstd-Refactor-intentional-wrap-around-test.patch deleted file mode 100644 index 7e6aba5..0000000 --- a/debian/patches/patchset-pf/zstd/0002-lib-zstd-Refactor-intentional-wrap-around-test.patch +++ /dev/null @@ -1,58 +0,0 @@ -From fa7962f32acca3bafd81520b3e1f8f24dbee4758 Mon Sep 17 00:00:00 2001 -From: Kees Cook -Date: Mon, 22 Jan 2024 16:27:56 -0800 -Subject: lib: zstd: Refactor intentional wrap-around test - -In an effort to separate intentional arithmetic wrap-around from -unexpected wrap-around, we need to refactor places that depend on this -kind of math. One of the most common code patterns of this is: - - VAR + value < VAR - -Notably, this is considered "undefined behavior" for signed and pointer -types, which the kernel works around by using the -fno-strict-overflow -option in the build[1] (which used to just be -fwrapv). Regardless, we -want to get the kernel source to the position where we can meaningfully -instrument arithmetic wrap-around conditions and catch them when they -are unexpected, regardless of whether they are signed[2], unsigned[3], -or pointer[4] types. - -Switch to a more regular type for a 64-bit value and refactor the -open-coded wrap-around addition test to use subtraction from the type max -(since add_would_overflow() may not be defined in early boot code). This -paves the way to enabling the wrap-around sanitizers in the future. - -Link: https://git.kernel.org/linus/68df3755e383e6fecf2354a67b08f92f18536594 [1] -Link: https://github.com/KSPP/linux/issues/26 [2] -Link: https://github.com/KSPP/linux/issues/27 [3] -Link: https://github.com/KSPP/linux/issues/344 [4] -Cc: Nick Terrell -Cc: Paul Jones -Cc: Sedat Dilek -Cc: Oleksandr Natalenko -Cc: Xin Gao -Signed-off-by: Kees Cook ---- - lib/zstd/decompress/zstd_decompress.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/lib/zstd/decompress/zstd_decompress.c -+++ b/lib/zstd/decompress/zstd_decompress.c -@@ -620,7 +620,7 @@ size_t ZSTD_readSkippableFrame(void* dst - * @return : decompressed size of the frames contained */ - unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) - { -- unsigned long long totalDstSize = 0; -+ U64 totalDstSize = 0; - - while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) { - U32 const magicNumber = MEM_readLE32(src); -@@ -638,7 +638,7 @@ unsigned long long ZSTD_findDecompressed - { unsigned long long const fcs = ZSTD_getFrameContentSize(src, srcSize); - if (fcs >= ZSTD_CONTENTSIZE_ERROR) return fcs; - -- if (totalDstSize + fcs < totalDstSize) -+ if (U64_MAX - totalDstSize < fcs) - return ZSTD_CONTENTSIZE_ERROR; /* check for overflow */ - totalDstSize += fcs; - } diff --git a/debian/patches/patchset-xanmod/binder/0001-binder-turn-into-module.patch b/debian/patches/patchset-xanmod/binder/0001-binder-turn-into-module.patch index 5f121ed..8759d86 100644 --- a/debian/patches/patchset-xanmod/binder/0001-binder-turn-into-module.patch +++ b/debian/patches/patchset-xanmod/binder/0001-binder-turn-into-module.patch @@ -1,4 +1,4 @@ -From ae8cebfd2446a0564c849adcd771ce538855b6b2 Mon Sep 17 00:00:00 2001 +From 8d80309c1a14b7b9f7fac80b68bd01956c1218d1 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 16 Jan 2019 23:13:25 +0100 Subject: binder: turn into module @@ -91,15 +91,15 @@ Signed-off-by: Alexandre Frade static inline void binder_selftest_alloc(struct binder_alloc *alloc) {} --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h -@@ -5,6 +5,7 @@ +@@ -4,6 +4,7 @@ + #define _LINUX_BINDER_INTERNAL_H - #include #include +#include #include #include #include -@@ -77,7 +78,7 @@ extern const struct file_operations bind +@@ -76,7 +77,7 @@ extern const struct file_operations bind extern char *binder_devices_param; @@ -108,7 +108,7 @@ Signed-off-by: Alexandre Frade extern bool is_binderfs_device(const struct inode *inode); extern struct dentry *binderfs_create_file(struct dentry *dir, const char *name, const struct file_operations *fops, -@@ -98,7 +99,7 @@ static inline struct dentry *binderfs_cr +@@ -97,7 +98,7 @@ static inline struct dentry *binderfs_cr static inline void binderfs_remove_file(struct dentry *dentry) {} #endif diff --git a/debian/patches/patchset-xanmod/clearlinux/0001-sched-wait-Do-accept-in-LIFO-order-for-cache-efficie.patch b/debian/patches/patchset-xanmod/clearlinux/0001-sched-wait-Do-accept-in-LIFO-order-for-cache-efficie.patch index 1015e19..bdce520 100644 --- a/debian/patches/patchset-xanmod/clearlinux/0001-sched-wait-Do-accept-in-LIFO-order-for-cache-efficie.patch +++ b/debian/patches/patchset-xanmod/clearlinux/0001-sched-wait-Do-accept-in-LIFO-order-for-cache-efficie.patch @@ -1,7 +1,7 @@ -From fa6cddbfd7915ed81dcbed99f9e5b5a9267d80a3 Mon Sep 17 00:00:00 2001 +From dc1feaaf2b7903976763eeedcc1387602c13f348 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 13 Dec 2018 01:00:49 +0000 -Subject: sched/wait: Do accept() in LIFO order for cache +Subject: [PATCH 1/4] sched/wait: Do accept() in LIFO order for cache efficiency Signed-off-by: Alexandre Frade @@ -21,7 +21,7 @@ Signed-off-by: Alexandre Frade extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); -@@ -1192,6 +1193,7 @@ do { \ +@@ -1195,6 +1196,7 @@ do { \ */ void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); @@ -71,7 +71,7 @@ Signed-off-by: Alexandre Frade wq_entry->flags = flags; --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c -@@ -634,7 +634,7 @@ static int inet_csk_wait_for_connect(str +@@ -632,7 +632,7 @@ static int inet_csk_wait_for_connect(str * having to remove and re-insert us on the wait queue. */ for (;;) { diff --git a/debian/patches/patchset-xanmod/clearlinux/0002-firmware-Enable-stateless-firmware-loading.patch b/debian/patches/patchset-xanmod/clearlinux/0002-firmware-Enable-stateless-firmware-loading.patch index 1085f44..a405b91 100644 --- a/debian/patches/patchset-xanmod/clearlinux/0002-firmware-Enable-stateless-firmware-loading.patch +++ b/debian/patches/patchset-xanmod/clearlinux/0002-firmware-Enable-stateless-firmware-loading.patch @@ -1,7 +1,7 @@ -From b837910f5e9f1928872e600a6835be6d422b761b Mon Sep 17 00:00:00 2001 +From 64705d09ddb58674e672b5d300a9ba29388b993f Mon Sep 17 00:00:00 2001 From: William Douglas Date: Wed, 20 Jun 2018 17:23:21 +0000 -Subject: firmware: Enable stateless firmware loading +Subject: [PATCH 2/4] firmware: Enable stateless firmware loading Prefer the order of specific version before generic and /etc before /lib to enable the user to give specific overrides for generic diff --git a/debian/patches/patchset-xanmod/clearlinux/0003-locking-rwsem-spin-faster.patch b/debian/patches/patchset-xanmod/clearlinux/0003-locking-rwsem-spin-faster.patch index 43d792a..11e284b 100644 --- a/debian/patches/patchset-xanmod/clearlinux/0003-locking-rwsem-spin-faster.patch +++ b/debian/patches/patchset-xanmod/clearlinux/0003-locking-rwsem-spin-faster.patch @@ -1,7 +1,7 @@ -From 274ba9c23b6fe3212c7f02f3e833086427034705 Mon Sep 17 00:00:00 2001 +From 922ad665f65ced17b949f2a687da59693fbea60f Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 18 Feb 2018 23:35:41 +0000 -Subject: locking: rwsem: spin faster +Subject: [PATCH 3/4] locking: rwsem: spin faster tweak rwsem owner spinning a bit diff --git a/debian/patches/patchset-xanmod/clearlinux/0004-drivers-initialize-ata-before-graphics.patch b/debian/patches/patchset-xanmod/clearlinux/0004-drivers-initialize-ata-before-graphics.patch index db05acc..3253f27 100644 --- a/debian/patches/patchset-xanmod/clearlinux/0004-drivers-initialize-ata-before-graphics.patch +++ b/debian/patches/patchset-xanmod/clearlinux/0004-drivers-initialize-ata-before-graphics.patch @@ -1,7 +1,7 @@ -From 0234467781c5b1c50f71f3936571e4ea3e77c279 Mon Sep 17 00:00:00 2001 +From 45eb20726d383c7003579405b52ceb13c6ce4b65 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 2 Jun 2016 23:36:32 -0500 -Subject: drivers: initialize ata before graphics +Subject: [PATCH 4/4] drivers: initialize ata before graphics ATA init is the long pole in the boot process, and its asynchronous. move the graphics init after it so that ata and graphics initialize diff --git a/debian/patches/patchset-xanmod/net/netfilter/0001-netfilter-Add-netfilter-nf_tables-fullcone-support.patch b/debian/patches/patchset-xanmod/net/netfilter/0001-netfilter-Add-netfilter-nf_tables-fullcone-support.patch index 7aa30cf..b972621 100644 --- a/debian/patches/patchset-xanmod/net/netfilter/0001-netfilter-Add-netfilter-nf_tables-fullcone-support.patch +++ b/debian/patches/patchset-xanmod/net/netfilter/0001-netfilter-Add-netfilter-nf_tables-fullcone-support.patch @@ -1,7 +1,7 @@ -From 2099f9c57216c836e445d2f6ba65f04131267f47 Mon Sep 17 00:00:00 2001 +From a78b8dee94d4742c4696c55c0eec964802e812ac Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Mon, 27 Feb 2023 01:38:18 +0000 -Subject: netfilter: Add netfilter nf_tables fullcone support +Subject: [PATCH 1/2] netfilter: Add netfilter nf_tables fullcone support Signed-off-by: Syrone Wong Signed-off-by: Alexandre Frade @@ -10,8 +10,8 @@ Signed-off-by: Alexandre Frade net/netfilter/Kconfig | 15 + net/netfilter/Makefile | 5 + net/netfilter/nf_nat_fullcone.c | 1604 +++++++++++++++++++++++ - net/netfilter/nft_ext_fullcone.c | 466 +++++++ - 5 files changed, 2246 insertions(+) + net/netfilter/nft_ext_fullcone.c | 470 +++++++ + 5 files changed, 2250 insertions(+) create mode 100644 include/net/netfilter/nf_nat_fullcone.h create mode 100644 net/netfilter/nf_nat_fullcone.c create mode 100644 net/netfilter/nft_ext_fullcone.c diff --git a/debian/patches/patchset-xanmod/net/netfilter/0002-netfilter-add-xt_FLOWOFFLOAD-target.patch b/debian/patches/patchset-xanmod/net/netfilter/0002-netfilter-add-xt_FLOWOFFLOAD-target.patch index 14bd561..901d7bb 100644 --- a/debian/patches/patchset-xanmod/net/netfilter/0002-netfilter-add-xt_FLOWOFFLOAD-target.patch +++ b/debian/patches/patchset-xanmod/net/netfilter/0002-netfilter-add-xt_FLOWOFFLOAD-target.patch @@ -1,7 +1,7 @@ -From 6fbfabdc4e5ef8a186c27e4ed2db28ee1ddf4b4e Mon Sep 17 00:00:00 2001 +From 242e385bcd49ee7ea5332b27864f81aab9b11718 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 20 Feb 2018 15:56:02 +0100 -Subject: netfilter: add xt_FLOWOFFLOAD target +Subject: [PATCH 2/2] netfilter: add xt_FLOWOFFLOAD target Signed-off-by: Felix Fietkau Signed-off-by: Alexandre Frade diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0001-net-tcp_bbr-broaden-app-limited-rate-sample-detectio.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0001-net-tcp_bbr-broaden-app-limited-rate-sample-detectio.patch index 4329bb4..3fc123c 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0001-net-tcp_bbr-broaden-app-limited-rate-sample-detectio.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0001-net-tcp_bbr-broaden-app-limited-rate-sample-detectio.patch @@ -1,7 +1,7 @@ -From 5435b92688a57d175607374d5bbff357e4ba3e71 Mon Sep 17 00:00:00 2001 +From 1e164adec73236b05d5b84846a460082d3d211d2 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Tue, 11 Jun 2019 12:26:55 -0400 -Subject: net-tcp_bbr: broaden app-limited rate sample detection +Subject: [PATCH 01/18] net-tcp_bbr: broaden app-limited rate sample detection This commit is a bug fix for the Linux TCP app-limited (application-limited) logic that is used for collecting rate @@ -32,7 +32,7 @@ Signed-off-by: Alexandre Frade --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c -@@ -3983,6 +3983,7 @@ static int tcp_ack(struct sock *sk, cons +@@ -3994,6 +3994,7 @@ static int tcp_ack(struct sock *sk, cons prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una; rs.prior_in_flight = tcp_packets_in_flight(tp); @@ -42,10 +42,10 @@ Signed-off-by: Alexandre Frade * is in window. --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c -@@ -699,6 +699,7 @@ void tcp_write_timer_handler(struct sock +@@ -702,6 +702,7 @@ void tcp_write_timer_handler(struct sock + icsk_timeout(icsk)); return; } - + tcp_rate_check_app_limited(sk); tcp_mstamp_refresh(tcp_sk(sk)); event = icsk->icsk_pending; diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0002-net-tcp_bbr-v2-shrink-delivered_mstamp-first_tx_msta.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0002-net-tcp_bbr-v2-shrink-delivered_mstamp-first_tx_msta.patch index 0dfd1ba..d99295a 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0002-net-tcp_bbr-v2-shrink-delivered_mstamp-first_tx_msta.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0002-net-tcp_bbr-v2-shrink-delivered_mstamp-first_tx_msta.patch @@ -1,7 +1,7 @@ -From 9aa33a35b5b9cbe65c87e6f9438e69ede143d11a Mon Sep 17 00:00:00 2001 +From e02830baf6c32aceea6c0dfe8a0a4b96db171418 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sun, 24 Jun 2018 21:55:59 -0400 -Subject: net-tcp_bbr: v2: shrink delivered_mstamp, +Subject: [PATCH 02/18] net-tcp_bbr: v2: shrink delivered_mstamp, first_tx_mstamp to u32 to free up 8 bytes Free up some space for tracking inflight and losses for each @@ -25,7 +25,7 @@ Signed-off-by: Alexandre Frade --- a/include/net/tcp.h +++ b/include/net/tcp.h -@@ -901,6 +901,11 @@ static inline u32 tcp_stamp_us_delta(u64 +@@ -945,6 +945,11 @@ static inline u32 tcp_stamp_us_delta(u64 return max_t(s64, t1 - t0, 0); } @@ -37,7 +37,7 @@ Signed-off-by: Alexandre Frade /* provide the departure time in us unit */ static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb) { -@@ -990,9 +995,9 @@ struct tcp_skb_cb { +@@ -1043,9 +1048,9 @@ struct tcp_skb_cb { /* pkts S/ACKed so far upon tx of skb, incl retrans: */ __u32 delivered; /* start of send pipeline phase */ diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0003-net-tcp_bbr-v2-snapshot-packets-in-flight-at-transmi.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0003-net-tcp_bbr-v2-snapshot-packets-in-flight-at-transmi.patch index 69bae5a..de775da 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0003-net-tcp_bbr-v2-snapshot-packets-in-flight-at-transmi.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0003-net-tcp_bbr-v2-snapshot-packets-in-flight-at-transmi.patch @@ -1,7 +1,7 @@ -From 63e1d064c4e4355293b9ee7014f4559cdeba4b8b Mon Sep 17 00:00:00 2001 +From b39e9e473ed0446e4268cffb09f4a260cea7c341 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sat, 5 Aug 2017 11:49:50 -0400 -Subject: net-tcp_bbr: v2: snapshot packets in flight at transmit +Subject: [PATCH 03/18] net-tcp_bbr: v2: snapshot packets in flight at transmit time and pass in rate_sample CC algorithms may want to snapshot the number of packets in flight at @@ -27,7 +27,7 @@ Signed-off-by: Alexandre Frade --- a/include/net/tcp.h +++ b/include/net/tcp.h -@@ -998,6 +998,10 @@ struct tcp_skb_cb { +@@ -1051,6 +1051,10 @@ struct tcp_skb_cb { u32 first_tx_mstamp; /* when we reached the "delivered" count */ u32 delivered_mstamp; @@ -38,7 +38,7 @@ Signed-off-by: Alexandre Frade } tx; /* only used for outgoing skbs */ union { struct inet_skb_parm h4; -@@ -1154,6 +1158,7 @@ struct rate_sample { +@@ -1207,6 +1211,7 @@ struct rate_sample { u64 prior_mstamp; /* starting timestamp for interval */ u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */ @@ -46,7 +46,7 @@ Signed-off-by: Alexandre Frade s32 delivered; /* number of packets delivered over interval */ s32 delivered_ce; /* number of packets delivered w/ CE marks*/ long interval_us; /* time for tp->delivered to incr "delivered" */ -@@ -1276,6 +1281,7 @@ static inline void tcp_ca_event(struct s +@@ -1329,6 +1334,7 @@ static inline void tcp_ca_event(struct s void tcp_set_ca_state(struct sock *sk, const u8 ca_state); /* From tcp_rate.c */ @@ -56,7 +56,7 @@ Signed-off-by: Alexandre Frade struct rate_sample *rs); --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c -@@ -2776,6 +2776,7 @@ static bool tcp_write_xmit(struct sock * +@@ -2777,6 +2777,7 @@ static bool tcp_write_xmit(struct sock * skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC); list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); tcp_init_tso_segs(skb, mss_now); diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0004-net-tcp_bbr-v2-count-packets-lost-over-TCP-rate-samp.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0004-net-tcp_bbr-v2-count-packets-lost-over-TCP-rate-samp.patch index 9fe6d7d..7a6b88e 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0004-net-tcp_bbr-v2-count-packets-lost-over-TCP-rate-samp.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0004-net-tcp_bbr-v2-count-packets-lost-over-TCP-rate-samp.patch @@ -1,7 +1,7 @@ -From 4022fb6da58dd67760dc8f3351067945a377df91 Mon Sep 17 00:00:00 2001 +From e4c82a08a05eeee9341511f35e922914520dd401 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 12 Oct 2017 23:44:27 -0400 -Subject: net-tcp_bbr: v2: count packets lost over TCP rate +Subject: [PATCH 04/18] net-tcp_bbr: v2: count packets lost over TCP rate sampling interval For understanding the relationship between inflight and packet loss @@ -19,7 +19,7 @@ Signed-off-by: Alexandre Frade --- a/include/net/tcp.h +++ b/include/net/tcp.h -@@ -1002,6 +1002,7 @@ struct tcp_skb_cb { +@@ -1055,6 +1055,7 @@ struct tcp_skb_cb { #define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1) u32 in_flight:20, /* packets in flight at transmit */ unused2:12; @@ -27,7 +27,7 @@ Signed-off-by: Alexandre Frade } tx; /* only used for outgoing skbs */ union { struct inet_skb_parm h4; -@@ -1156,11 +1157,13 @@ struct ack_sample { +@@ -1209,11 +1210,13 @@ struct ack_sample { */ struct rate_sample { u64 prior_mstamp; /* starting timestamp for interval */ diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0005-net-tcp_bbr-v2-export-FLAG_ECE-in-rate_sample.is_ece.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0005-net-tcp_bbr-v2-export-FLAG_ECE-in-rate_sample.is_ece.patch index 7a2a99d..3569dbc 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0005-net-tcp_bbr-v2-export-FLAG_ECE-in-rate_sample.is_ece.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0005-net-tcp_bbr-v2-export-FLAG_ECE-in-rate_sample.is_ece.patch @@ -1,7 +1,7 @@ -From 3ff71ca0a15ebe4e5db9c0089121eafd2efc02ba Mon Sep 17 00:00:00 2001 +From 3d3b54d263b6271926e0d9800ca000a529267cfe Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Nov 2018 13:48:36 -0500 -Subject: net-tcp_bbr: v2: export FLAG_ECE in rate_sample.is_ece +Subject: [PATCH 05/18] net-tcp_bbr: v2: export FLAG_ECE in rate_sample.is_ece For understanding the relationship between inflight and ECN signals, to try to find the highest inflight value that has acceptable levels @@ -18,7 +18,7 @@ Signed-off-by: Alexandre Frade --- a/include/net/tcp.h +++ b/include/net/tcp.h -@@ -1175,6 +1175,7 @@ struct rate_sample { +@@ -1228,6 +1228,7 @@ struct rate_sample { bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ bool is_ack_delayed; /* is this (likely) a delayed ACK? */ @@ -28,7 +28,7 @@ Signed-off-by: Alexandre Frade struct tcp_congestion_ops { --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c -@@ -4073,6 +4073,7 @@ static int tcp_ack(struct sock *sk, cons +@@ -4084,6 +4084,7 @@ static int tcp_ack(struct sock *sk, cons delivered = tcp_newly_delivered(sk, delivered, flag); lost = tp->lost - lost; /* freshly marked lost */ rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0006-net-tcp_bbr-v2-introduce-ca_ops-skb_marked_lost-CC-m.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0006-net-tcp_bbr-v2-introduce-ca_ops-skb_marked_lost-CC-m.patch index 8b0c730..33306b6 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0006-net-tcp_bbr-v2-introduce-ca_ops-skb_marked_lost-CC-m.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0006-net-tcp_bbr-v2-introduce-ca_ops-skb_marked_lost-CC-m.patch @@ -1,7 +1,7 @@ -From fa9348cbc2b5a0f1f3fc82e51ae6ce956f8cfb1f Mon Sep 17 00:00:00 2001 +From 6f3ecfa5c713cacd451b876f4190da6d36c512eb Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Tue, 7 Aug 2018 21:52:06 -0400 -Subject: net-tcp_bbr: v2: introduce ca_ops->skb_marked_lost() CC +Subject: [PATCH 06/18] net-tcp_bbr: v2: introduce ca_ops->skb_marked_lost() CC module callback API For connections experiencing reordering, RACK can mark packets lost @@ -30,7 +30,7 @@ Signed-off-by: Alexandre Frade --- a/include/net/tcp.h +++ b/include/net/tcp.h -@@ -1202,6 +1202,9 @@ struct tcp_congestion_ops { +@@ -1255,6 +1255,9 @@ struct tcp_congestion_ops { /* override sysctl_tcp_min_tso_segs */ u32 (*min_tso_segs)(struct sock *sk); @@ -42,7 +42,7 @@ Signed-off-by: Alexandre Frade */ --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c -@@ -1140,7 +1140,12 @@ static void tcp_verify_retransmit_hint(s +@@ -1139,7 +1139,12 @@ static void tcp_verify_retransmit_hint(s */ static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb) { diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0007-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-merge-in.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0007-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-merge-in.patch index c1adf52..2ad901a 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0007-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-merge-in.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0007-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-merge-in.patch @@ -1,7 +1,7 @@ -From 3add8086d7d76fe240fb341a4e49149ac4332990 Mon Sep 17 00:00:00 2001 +From 52b39284416d0d841d4d51db0d3fce1c191a00d7 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 1 May 2019 20:16:33 -0400 -Subject: net-tcp_bbr: v2: adjust skb tx.in_flight upon merge in +Subject: [PATCH 07/18] net-tcp_bbr: v2: adjust skb tx.in_flight upon merge in tcp_shifted_skb() When tcp_shifted_skb() updates state as adjacent SACKed skbs are @@ -39,7 +39,7 @@ Signed-off-by: Alexandre Frade --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c -@@ -1517,6 +1517,17 @@ static bool tcp_shifted_skb(struct sock +@@ -1516,6 +1516,17 @@ static bool tcp_shifted_skb(struct sock WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); tcp_skb_pcount_add(skb, -pcount); diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0008-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-split-in.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0008-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-split-in.patch index 5f17854..73c3824 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0008-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-split-in.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0008-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-split-in.patch @@ -1,7 +1,7 @@ -From 6363d43645b3383ba590d0574dc37a215386aacf Mon Sep 17 00:00:00 2001 +From 64570028a478a2249356345127bb3a3c75509d57 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 1 May 2019 20:16:25 -0400 -Subject: net-tcp_bbr: v2: adjust skb tx.in_flight upon split in +Subject: [PATCH 08/18] net-tcp_bbr: v2: adjust skb tx.in_flight upon split in tcp_fragment() When we fragment an skb that has already been sent, we need to update @@ -31,7 +31,7 @@ Signed-off-by: Alexandre Frade --- a/include/net/tcp.h +++ b/include/net/tcp.h -@@ -1301,6 +1301,21 @@ static inline bool tcp_skb_sent_after(u6 +@@ -1354,6 +1354,21 @@ static inline bool tcp_skb_sent_after(u6 return t1 > t2 || (t1 == t2 && after(seq1, seq2)); } @@ -55,16 +55,16 @@ Signed-off-by: Alexandre Frade * between different flows. --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c -@@ -1612,7 +1612,7 @@ int tcp_fragment(struct sock *sk, enum t +@@ -1614,7 +1614,7 @@ int tcp_fragment(struct sock *sk, enum t { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; - int old_factor; + int old_factor, inflight_prev; long limit; + u16 flags; int nlen; - u8 flags; -@@ -1687,6 +1687,30 @@ int tcp_fragment(struct sock *sk, enum t +@@ -1689,6 +1689,30 @@ int tcp_fragment(struct sock *sk, enum t if (diff) tcp_adjust_pcount(sk, skb, diff); diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0009-net-tcp-add-new-ca-opts-flag-TCP_CONG_WANTS_CE_EVENT.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0009-net-tcp-add-new-ca-opts-flag-TCP_CONG_WANTS_CE_EVENT.patch index 2997ff4..9651fc0 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0009-net-tcp-add-new-ca-opts-flag-TCP_CONG_WANTS_CE_EVENT.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0009-net-tcp-add-new-ca-opts-flag-TCP_CONG_WANTS_CE_EVENT.patch @@ -1,7 +1,7 @@ -From 8c1b5bf6012099cba8911e255487bca5d0a2bd02 Mon Sep 17 00:00:00 2001 +From fb3f903921be91a91b577d3771e5d5c9e9fe3aa9 Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Wed, 23 May 2018 17:55:54 -0700 -Subject: net-tcp: add new ca opts flag TCP_CONG_WANTS_CE_EVENTS +Subject: [PATCH 09/18] net-tcp: add new ca opts flag TCP_CONG_WANTS_CE_EVENTS Add a a new ca opts flag TCP_CONG_WANTS_CE_EVENTS that allows a congestion control module to receive CE events. @@ -23,20 +23,20 @@ Signed-off-by: Alexandre Frade --- a/include/net/tcp.h +++ b/include/net/tcp.h -@@ -1137,7 +1137,11 @@ enum tcp_ca_ack_event_flags { - #define TCP_CONG_NON_RESTRICTED 0x1 +@@ -1190,7 +1190,11 @@ enum tcp_ca_ack_event_flags { + #define TCP_CONG_NON_RESTRICTED BIT(0) /* Requires ECN/ECT set on all packets */ - #define TCP_CONG_NEEDS_ECN 0x2 + #define TCP_CONG_NEEDS_ECN BIT(1) -#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) +/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */ -+#define TCP_CONG_WANTS_CE_EVENTS 0x4 ++#define TCP_CONG_WANTS_CE_EVENTS BIT(2) +#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | \ + TCP_CONG_NEEDS_ECN | \ + TCP_CONG_WANTS_CE_EVENTS) union tcp_cc_info; -@@ -1269,6 +1273,14 @@ static inline char *tcp_ca_get_name_by_k +@@ -1322,6 +1326,14 @@ static inline char *tcp_ca_get_name_by_k } #endif @@ -53,7 +53,7 @@ Signed-off-by: Alexandre Frade const struct inet_connection_sock *icsk = inet_csk(sk); --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c -@@ -376,7 +376,7 @@ static void __tcp_ecn_check_ce(struct so +@@ -381,7 +381,7 @@ static void tcp_data_ecn_check(struct so tcp_enter_quickack_mode(sk, 2); break; case INET_ECN_CE: @@ -62,7 +62,7 @@ Signed-off-by: Alexandre Frade tcp_ca_event(sk, CA_EVENT_ECN_IS_CE); if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { -@@ -387,7 +387,7 @@ static void __tcp_ecn_check_ce(struct so +@@ -392,7 +392,7 @@ static void tcp_data_ecn_check(struct so tp->ecn_flags |= TCP_ECN_SEEN; break; default: diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0010-net-tcp-re-generalize-TSO-sizing-in-TCP-CC-module-AP.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0010-net-tcp-re-generalize-TSO-sizing-in-TCP-CC-module-AP.patch index 139f0f6..bf70dd2 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0010-net-tcp-re-generalize-TSO-sizing-in-TCP-CC-module-AP.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0010-net-tcp-re-generalize-TSO-sizing-in-TCP-CC-module-AP.patch @@ -1,7 +1,7 @@ -From 15fd38de916127d286bd373903fdfa5215b05aa4 Mon Sep 17 00:00:00 2001 +From e0ac041e7e63d138d210fe875120447c11d2d4ba Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 27 Sep 2019 17:10:26 -0400 -Subject: net-tcp: re-generalize TSO sizing in TCP CC module API +Subject: [PATCH 10/18] net-tcp: re-generalize TSO sizing in TCP CC module API Reorganize the API for CC modules so that the CC module once again gets complete control of the TSO sizing decision. This is how the API @@ -28,7 +28,7 @@ Signed-off-by: Alexandre Frade --- a/include/net/tcp.h +++ b/include/net/tcp.h -@@ -1203,8 +1203,8 @@ struct tcp_congestion_ops { +@@ -1256,8 +1256,8 @@ struct tcp_congestion_ops { /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); @@ -118,7 +118,7 @@ Signed-off-by: Alexandre Frade }; --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c -@@ -2068,13 +2068,12 @@ static u32 tcp_tso_autosize(const struct +@@ -2069,13 +2069,12 @@ static u32 tcp_tso_autosize(const struct static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) { const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0011-net-tcp-add-fast_ack_mode-1-skip-rwin-check-in-tcp_f.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0011-net-tcp-add-fast_ack_mode-1-skip-rwin-check-in-tcp_f.patch index e846bf5..4bc995d 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0011-net-tcp-add-fast_ack_mode-1-skip-rwin-check-in-tcp_f.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0011-net-tcp-add-fast_ack_mode-1-skip-rwin-check-in-tcp_f.patch @@ -1,7 +1,7 @@ -From 344af0ac329b2b1ce5f1ce920166e4aeb5e83037 Mon Sep 17 00:00:00 2001 +From 323945d608bbef5fd3a444f52442bf0154c4ef0b Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sat, 16 Nov 2019 13:16:25 -0500 -Subject: net-tcp: add fast_ack_mode=1: skip rwin check in +Subject: [PATCH 11/18] net-tcp: add fast_ack_mode=1: skip rwin check in tcp_fast_ack_mode__tcp_ack_snd_check() Add logic for an optional TCP connection behavior, enabled with @@ -22,9 +22,9 @@ Signed-off-by: Alexandre Frade --- a/include/linux/tcp.h +++ b/include/linux/tcp.h -@@ -245,7 +245,8 @@ struct tcp_sock { - /* OOO segments go in this rbtree. Socket lock must be held. */ - struct rb_root out_of_order_queue; +@@ -248,7 +248,8 @@ struct tcp_sock { + void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq); + #endif u32 snd_ssthresh; /* Slow start size threshold */ - u8 recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */ + u32 recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ @@ -34,7 +34,7 @@ Signed-off-by: Alexandre Frade /* TX read-write hotpath cache lines */ --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c -@@ -3398,6 +3398,7 @@ int tcp_disconnect(struct sock *sk, int +@@ -3411,6 +3411,7 @@ int tcp_disconnect(struct sock *sk, int tp->rx_opt.dsack = 0; tp->rx_opt.num_sacks = 0; tp->rcv_ooopack = 0; @@ -54,7 +54,7 @@ Signed-off-by: Alexandre Frade if (tcp_ca_needs_ecn(sk)) --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c -@@ -5790,13 +5790,14 @@ static void __tcp_ack_snd_check(struct s +@@ -5800,13 +5800,14 @@ static void __tcp_ack_snd_check(struct s /* More than one full frame received... */ if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0012-net-tcp_bbr-v2-record-app-limited-status-of-TLP-repa.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0012-net-tcp_bbr-v2-record-app-limited-status-of-TLP-repa.patch index 13ef8dd..232b881 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0012-net-tcp_bbr-v2-record-app-limited-status-of-TLP-repa.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0012-net-tcp_bbr-v2-record-app-limited-status-of-TLP-repa.patch @@ -1,7 +1,7 @@ -From 18f564dbe586ab02c48563a9e05e71aa1a421607 Mon Sep 17 00:00:00 2001 +From 30fc364b7834b5dce9434dfab4adb49d4924ac03 Mon Sep 17 00:00:00 2001 From: Jianfeng Wang Date: Fri, 19 Jun 2020 17:33:45 +0000 -Subject: net-tcp_bbr: v2: record app-limited status of +Subject: [PATCH 12/18] net-tcp_bbr: v2: record app-limited status of TLP-repaired flight When sending a TLP retransmit, record whether the outstanding flight @@ -23,7 +23,7 @@ Signed-off-by: Alexandre Frade --- a/include/linux/tcp.h +++ b/include/linux/tcp.h -@@ -303,7 +303,8 @@ struct tcp_sock { +@@ -306,7 +306,8 @@ struct tcp_sock { */ struct tcp_options_received rx_opt; u8 nonagle : 4,/* Disable Nagle algorithm? */ @@ -35,7 +35,7 @@ Signed-off-by: Alexandre Frade /* RX read-write hotpath cache lines */ --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c -@@ -3012,6 +3012,7 @@ void tcp_send_loss_probe(struct sock *sk +@@ -3013,6 +3013,7 @@ void tcp_send_loss_probe(struct sock *sk if (WARN_ON(!skb || !tcp_skb_pcount(skb))) goto rearm_timer; diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0013-net-tcp_bbr-v2-inform-CC-module-of-losses-repaired-b.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0013-net-tcp_bbr-v2-inform-CC-module-of-losses-repaired-b.patch index 53faf4f..8173be7 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0013-net-tcp_bbr-v2-inform-CC-module-of-losses-repaired-b.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0013-net-tcp_bbr-v2-inform-CC-module-of-losses-repaired-b.patch @@ -1,7 +1,7 @@ -From 8da6e7d31a73453ce8495f004951069f5ef67c86 Mon Sep 17 00:00:00 2001 +From 0c90659bb5c4af502726a0c71cecc096ece1fc95 Mon Sep 17 00:00:00 2001 From: Jianfeng Wang Date: Tue, 16 Jun 2020 17:41:19 +0000 -Subject: net-tcp_bbr: v2: inform CC module of losses repaired by +Subject: [PATCH 13/18] net-tcp_bbr: v2: inform CC module of losses repaired by TLP probe Before this commit, when there is a packet loss that creates a sequence @@ -25,7 +25,7 @@ Signed-off-by: Alexandre Frade --- a/include/net/tcp.h +++ b/include/net/tcp.h -@@ -1115,6 +1115,7 @@ enum tcp_ca_event { +@@ -1168,6 +1168,7 @@ enum tcp_ca_event { CA_EVENT_LOSS, /* loss timeout */ CA_EVENT_ECN_NO_CE, /* ECT set, but not CE marked */ CA_EVENT_ECN_IS_CE, /* received CE marked IP packet */ @@ -35,7 +35,7 @@ Signed-off-by: Alexandre Frade /* Information about inbound ACK, passed to cong_ops->in_ack_event() */ --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c -@@ -3870,6 +3870,7 @@ static void tcp_process_tlp_ack(struct s +@@ -3881,6 +3881,7 @@ static void tcp_process_tlp_ack(struct s /* ACK advances: there was a loss, so reduce cwnd. Reset * tlp_high_seq in tcp_init_cwnd_reduction() */ diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0014-net-tcp_bbr-v2-introduce-is_acking_tlp_retrans_seq-i.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0014-net-tcp_bbr-v2-introduce-is_acking_tlp_retrans_seq-i.patch index afed86b..33fe186 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0014-net-tcp_bbr-v2-introduce-is_acking_tlp_retrans_seq-i.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0014-net-tcp_bbr-v2-introduce-is_acking_tlp_retrans_seq-i.patch @@ -1,7 +1,7 @@ -From 528d5f9d97954b32db6ae1fe1729c4965886b6df Mon Sep 17 00:00:00 2001 +From 0c9501f0d0743b7ab6958e064760b773b3bdf19b Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 21 Sep 2020 14:46:26 -0400 -Subject: net-tcp_bbr: v2: introduce is_acking_tlp_retrans_seq +Subject: [PATCH 14/18] net-tcp_bbr: v2: introduce is_acking_tlp_retrans_seq into rate_sample Introduce is_acking_tlp_retrans_seq into rate_sample. This bool will @@ -21,7 +21,7 @@ Signed-off-by: Alexandre Frade --- a/include/net/tcp.h +++ b/include/net/tcp.h -@@ -1179,6 +1179,7 @@ struct rate_sample { +@@ -1232,6 +1232,7 @@ struct rate_sample { u32 last_end_seq; /* end_seq of most recently ACKed packet */ bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ @@ -31,7 +31,7 @@ Signed-off-by: Alexandre Frade }; --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c -@@ -3853,7 +3853,8 @@ static void tcp_replace_ts_recent(struct +@@ -3864,7 +3864,8 @@ static int tcp_replace_ts_recent(struct /* This routine deals with acks during a TLP episode and ends an episode by * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack */ @@ -41,7 +41,7 @@ Signed-off-by: Alexandre Frade { struct tcp_sock *tp = tcp_sk(sk); -@@ -3881,6 +3882,11 @@ static void tcp_process_tlp_ack(struct s +@@ -3892,6 +3893,11 @@ static void tcp_process_tlp_ack(struct s FLAG_NOT_DUP | FLAG_DATA_SACKED))) { /* Pure dupack: original and TLP probe arrived; no loss */ tp->tlp_high_seq = 0; @@ -53,7 +53,7 @@ Signed-off-by: Alexandre Frade } } -@@ -4066,7 +4072,7 @@ static int tcp_ack(struct sock *sk, cons +@@ -4077,7 +4083,7 @@ static int tcp_ack(struct sock *sk, cons tcp_in_ack_event(sk, flag); if (tp->tlp_high_seq) @@ -62,7 +62,7 @@ Signed-off-by: Alexandre Frade if (tcp_ack_is_dubious(sk, flag)) { if (!(flag & (FLAG_SND_UNA_ADVANCED | -@@ -4111,7 +4117,7 @@ no_queue: +@@ -4122,7 +4128,7 @@ no_queue: tcp_ack_probe(sk); if (tp->tlp_high_seq) diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0015-tcp-introduce-per-route-feature-RTAX_FEATURE_ECN_LOW.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0015-tcp-introduce-per-route-feature-RTAX_FEATURE_ECN_LOW.patch index 1bc99f1..7feb442 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0015-tcp-introduce-per-route-feature-RTAX_FEATURE_ECN_LOW.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0015-tcp-introduce-per-route-feature-RTAX_FEATURE_ECN_LOW.patch @@ -1,7 +1,7 @@ -From a086cf589b0ab974965d88d338c0a373eff5d67c Mon Sep 17 00:00:00 2001 +From c70e032f0effa66e1f67ae0a5ed65fac83c6c267 Mon Sep 17 00:00:00 2001 From: David Morley Date: Fri, 14 Jul 2023 11:07:56 -0400 -Subject: tcp: introduce per-route feature RTAX_FEATURE_ECN_LOW +Subject: [PATCH 15/18] tcp: introduce per-route feature RTAX_FEATURE_ECN_LOW Define and implement a new per-route feature, RTAX_FEATURE_ECN_LOW. @@ -33,15 +33,15 @@ Signed-off-by: Alexandre Frade --- a/include/net/tcp.h +++ b/include/net/tcp.h -@@ -376,6 +376,7 @@ static inline void tcp_dec_quickack_mode - #define TCP_ECN_QUEUE_CWR 2 - #define TCP_ECN_DEMAND_CWR 4 - #define TCP_ECN_SEEN 8 -+#define TCP_ECN_LOW 16 +@@ -379,6 +379,7 @@ static inline void tcp_dec_quickack_mode + #define TCP_ECN_DEMAND_CWR BIT(2) + #define TCP_ECN_SEEN BIT(3) + #define TCP_ECN_MODE_ACCECN BIT(4) ++#define TCP_ECN_LOW BIT(5) - enum tcp_tw_status { - TCP_TW_SUCCESS = 0, -@@ -796,6 +797,15 @@ static inline void tcp_fast_path_check(s + #define TCP_ECN_DISABLED 0 + #define TCP_ECN_MODE_PENDING (TCP_ECN_MODE_RFC3168 | TCP_ECN_MODE_ACCECN) +@@ -840,6 +841,15 @@ static inline void tcp_fast_path_check(s u32 tcp_delack_max(const struct sock *sk); @@ -59,7 +59,7 @@ Signed-off-by: Alexandre Frade { --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h -@@ -516,12 +516,14 @@ enum { +@@ -517,12 +517,14 @@ enum { #define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */ #define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */ #define RTAX_FEATURE_TCP_USEC_TS (1 << 4) @@ -77,7 +77,7 @@ Signed-off-by: Alexandre Frade __u8 proto; --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c -@@ -471,6 +471,8 @@ void tcp_ca_openreq_child(struct sock *s +@@ -472,6 +472,8 @@ void tcp_ca_openreq_child(struct sock *s u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); bool ca_got_dst = false; @@ -101,7 +101,7 @@ Signed-off-by: Alexandre Frade use_ecn = true; } @@ -354,6 +353,9 @@ static void tcp_ecn_send_syn(struct sock - tp->ecn_flags = TCP_ECN_OK; + tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) INET_ECN_xmit(sk); + diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0016-net-tcp_bbr-v3-update-TCP-bbr-congestion-control-mod.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0016-net-tcp_bbr-v3-update-TCP-bbr-congestion-control-mod.patch index 9b58e1f..ac6741a 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0016-net-tcp_bbr-v3-update-TCP-bbr-congestion-control-mod.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0016-net-tcp_bbr-v3-update-TCP-bbr-congestion-control-mod.patch @@ -1,7 +1,7 @@ -From 3259adaa6771b29fdf023acffe469979cdd1caae Mon Sep 17 00:00:00 2001 +From 2e495d77c2494b3ebf22dac97dd5dc025b86ba86 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Tue, 11 Jun 2019 12:54:22 -0400 -Subject: net-tcp_bbr: v3: update TCP "bbr" congestion control +Subject: [PATCH 16/18] net-tcp_bbr: v3: update TCP "bbr" congestion control module to BBRv3 BBR v3 is an enhacement to the BBR v1 algorithm. It's designed to aim for lower @@ -135,12 +135,12 @@ Signed-off-by: Alexandre Frade include/net/tcp.h | 2 +- include/uapi/linux/inet_diag.h | 23 + net/ipv4/Kconfig | 21 +- - net/ipv4/tcp_bbr.c | 2217 +++++++++++++++++++++------- - 5 files changed, 1742 insertions(+), 525 deletions(-) + net/ipv4/tcp_bbr.c | 2211 +++++++++++++++++++++------- + 5 files changed, 1740 insertions(+), 521 deletions(-) --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h -@@ -137,8 +137,8 @@ struct inet_connection_sock { +@@ -132,8 +132,8 @@ struct inet_connection_sock { u32 icsk_probes_tstamp; u32 icsk_user_timeout; @@ -153,7 +153,7 @@ Signed-off-by: Alexandre Frade #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ --- a/include/net/tcp.h +++ b/include/net/tcp.h -@@ -2491,7 +2491,7 @@ struct tcp_plb_state { +@@ -2546,7 +2546,7 @@ struct tcp_plb_state { u8 consec_cong_rounds:5, /* consecutive congested rounds */ unused:3; u32 pause_until; /* jiffies32 when PLB can resume rerouting */ @@ -472,7 +472,7 @@ Signed-off-by: Alexandre Frade /* Gain factor for adding extra_acked to target cwnd: */ static const int bbr_extra_acked_gain = BBR_UNIT; /* Window length of extra_acked window. */ -@@ -201,8 +256,121 @@ static const u32 bbr_ack_epoch_acked_res +@@ -201,8 +256,123 @@ static const u32 bbr_ack_epoch_acked_res /* Time period for clamping cwnd increment due to ack aggregation */ static const u32 bbr_extra_acked_max_us = 100 * 1000; @@ -587,14 +587,16 @@ Signed-off-by: Alexandre Frade + */ +static bool bbr_can_use_ecn(const struct sock *sk) +{ -+ return (tcp_sk(sk)->ecn_flags & TCP_ECN_OK) && ++ struct tcp_sock *tp = tcp_sk(sk); ++ ++ return tcp_ecn_mode_any(tp) && + (tcp_sk(sk)->ecn_flags & TCP_ECN_LOW); +} + /* Do we estimate that STARTUP filled the pipe? */ static bool bbr_full_bw_reached(const struct sock *sk) { -@@ -214,17 +382,17 @@ static bool bbr_full_bw_reached(const st +@@ -214,17 +384,17 @@ static bool bbr_full_bw_reached(const st /* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ static u32 bbr_max_bw(const struct sock *sk) { @@ -616,7 +618,7 @@ Signed-off-by: Alexandre Frade } /* Return maximum extra acked in past k-2k round trips, -@@ -241,15 +409,23 @@ static u16 bbr_extra_acked(const struct +@@ -241,15 +411,23 @@ static u16 bbr_extra_acked(const struct * The order here is chosen carefully to avoid overflow of u64. This should * work for input rates of up to 2.9Tbit/sec and gain of 2.89x. */ @@ -643,7 +645,7 @@ Signed-off-by: Alexandre Frade } /* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */ -@@ -257,12 +433,13 @@ static unsigned long bbr_bw_to_pacing_ra +@@ -257,12 +435,13 @@ static unsigned long bbr_bw_to_pacing_ra { u64 rate = bw; @@ -659,7 +661,7 @@ Signed-off-by: Alexandre Frade static void bbr_init_pacing_rate_from_rtt(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); -@@ -279,7 +456,8 @@ static void bbr_init_pacing_rate_from_rt +@@ -279,7 +458,8 @@ static void bbr_init_pacing_rate_from_rt bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT; do_div(bw, rtt_us); WRITE_ONCE(sk->sk_pacing_rate, @@ -669,7 +671,7 @@ Signed-off-by: Alexandre Frade } /* Pace using current bw estimate and a gain factor. */ -@@ -295,31 +473,38 @@ static void bbr_set_pacing_rate(struct s +@@ -295,31 +475,38 @@ static void bbr_set_pacing_rate(struct s WRITE_ONCE(sk->sk_pacing_rate, rate); } @@ -719,7 +721,7 @@ Signed-off-by: Alexandre Frade { return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size); } -@@ -329,7 +514,7 @@ static u32 bbr_tso_segs_goal(struct sock +@@ -329,7 +516,7 @@ static u32 bbr_tso_segs_goal(struct sock { struct tcp_sock *tp = tcp_sk(sk); @@ -728,7 +730,7 @@ Signed-off-by: Alexandre Frade } /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ -@@ -349,7 +534,9 @@ __bpf_kfunc static void bbr_cwnd_event(s +@@ -349,7 +536,9 @@ __bpf_kfunc static void bbr_cwnd_event(s struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); @@ -739,7 +741,7 @@ Signed-off-by: Alexandre Frade bbr->idle_restart = 1; bbr->ack_epoch_mstamp = tp->tcp_mstamp; bbr->ack_epoch_acked = 0; -@@ -360,6 +547,16 @@ __bpf_kfunc static void bbr_cwnd_event(s +@@ -360,6 +549,16 @@ __bpf_kfunc static void bbr_cwnd_event(s bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT); else if (bbr->mode == BBR_PROBE_RTT) bbr_check_probe_rtt_done(sk); @@ -756,7 +758,7 @@ Signed-off-by: Alexandre Frade } } -@@ -382,10 +579,10 @@ static u32 bbr_bdp(struct sock *sk, u32 +@@ -382,10 +581,10 @@ static u32 bbr_bdp(struct sock *sk, u32 * default. This should only happen when the connection is not using TCP * timestamps and has retransmitted all of the SYN/SYNACK/data packets * ACKed so far. In this case, an RTO can cut cwnd to 1, in which @@ -769,7 +771,7 @@ Signed-off-by: Alexandre Frade w = (u64)bw * bbr->min_rtt_us; -@@ -402,23 +599,23 @@ static u32 bbr_bdp(struct sock *sk, u32 +@@ -402,23 +601,23 @@ static u32 bbr_bdp(struct sock *sk, u32 * - one skb in sending host Qdisc, * - one skb in sending host TSO/GSO engine * - one skb being received by receiver host LRO/GRO/delayed-ACK engine @@ -801,7 +803,7 @@ Signed-off-by: Alexandre Frade cwnd += 2; return cwnd; -@@ -473,10 +670,10 @@ static u32 bbr_ack_aggregation_cwnd(stru +@@ -473,10 +672,10 @@ static u32 bbr_ack_aggregation_cwnd(stru { u32 max_aggr_cwnd, aggr_cwnd = 0; @@ -814,7 +816,7 @@ Signed-off-by: Alexandre Frade >> BBR_SCALE; aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd); } -@@ -484,66 +681,27 @@ static u32 bbr_ack_aggregation_cwnd(stru +@@ -484,66 +683,27 @@ static u32 bbr_ack_aggregation_cwnd(stru return aggr_cwnd; } @@ -888,7 +890,7 @@ Signed-off-by: Alexandre Frade target_cwnd = bbr_bdp(sk, bw, gain); /* Increment the cwnd to account for excess ACKed data that seems -@@ -552,74 +710,26 @@ static void bbr_set_cwnd(struct sock *sk +@@ -552,74 +712,26 @@ static void bbr_set_cwnd(struct sock *sk target_cwnd += bbr_ack_aggregation_cwnd(sk); target_cwnd = bbr_quantization_budget(sk, target_cwnd); @@ -980,7 +982,7 @@ Signed-off-by: Alexandre Frade } static void bbr_reset_startup_mode(struct sock *sk) -@@ -629,191 +739,49 @@ static void bbr_reset_startup_mode(struc +@@ -629,191 +741,49 @@ static void bbr_reset_startup_mode(struc bbr->mode = BBR_STARTUP; } @@ -1058,11 +1060,9 @@ Signed-off-by: Alexandre Frade + * returns packets delivered since previous round start plus this ACK. */ -static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs) -+static u32 bbr_update_round_start(struct sock *sk, -+ const struct rate_sample *rs, struct bbr_context *ctx) - { - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); +-{ +- struct tcp_sock *tp = tcp_sk(sk); +- struct bbr *bbr = inet_csk_ca(sk); - u32 lost, delivered; - u64 bw; - u32 t; @@ -1133,9 +1133,11 @@ Signed-off-by: Alexandre Frade - -/* Estimate the bandwidth based on how fast packets are delivered */ -static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) --{ -- struct tcp_sock *tp = tcp_sk(sk); -- struct bbr *bbr = inet_csk_ca(sk); ++static u32 bbr_update_round_start(struct sock *sk, ++ const struct rate_sample *rs, struct bbr_context *ctx) + { + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); - u64 bw; + u32 round_delivered = 0; @@ -1196,7 +1198,7 @@ Signed-off-by: Alexandre Frade } /* Estimates the windowed max degree of ack aggregation. -@@ -827,7 +795,7 @@ static void bbr_update_bw(struct sock *s +@@ -827,7 +797,7 @@ static void bbr_update_bw(struct sock *s * * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms). * Max filter is an approximate sliding window of 5-10 (packet timed) round @@ -1205,7 +1207,7 @@ Signed-off-by: Alexandre Frade */ static void bbr_update_ack_aggregation(struct sock *sk, const struct rate_sample *rs) -@@ -835,15 +803,19 @@ static void bbr_update_ack_aggregation(s +@@ -835,15 +805,19 @@ static void bbr_update_ack_aggregation(s u32 epoch_us, expected_acked, extra_acked; struct bbr *bbr = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -1227,7 +1229,7 @@ Signed-off-by: Alexandre Frade bbr->extra_acked_win_rtts = 0; bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ? 0 : 1; -@@ -877,49 +849,6 @@ static void bbr_update_ack_aggregation(s +@@ -877,49 +851,6 @@ static void bbr_update_ack_aggregation(s bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked; } @@ -1277,7 +1279,7 @@ Signed-off-by: Alexandre Frade static void bbr_check_probe_rtt_done(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); -@@ -929,9 +858,9 @@ static void bbr_check_probe_rtt_done(str +@@ -929,9 +860,9 @@ static void bbr_check_probe_rtt_done(str after(tcp_jiffies32, bbr->probe_rtt_done_stamp))) return; @@ -1289,7 +1291,7 @@ Signed-off-by: Alexandre Frade } /* The goal of PROBE_RTT mode is to have BBR flows cooperatively and -@@ -957,23 +886,35 @@ static void bbr_update_min_rtt(struct so +@@ -957,23 +888,35 @@ static void bbr_update_min_rtt(struct so { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); @@ -1334,7 +1336,7 @@ Signed-off-by: Alexandre Frade } if (bbr->mode == BBR_PROBE_RTT) { -@@ -982,9 +923,9 @@ static void bbr_update_min_rtt(struct so +@@ -982,9 +925,9 @@ static void bbr_update_min_rtt(struct so (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; /* Maintain min packets in flight for max(200 ms, 1 round). */ if (!bbr->probe_rtt_done_stamp && @@ -1346,7 +1348,7 @@ Signed-off-by: Alexandre Frade bbr->probe_rtt_round_done = 0; bbr->next_rtt_delivered = tp->delivered; } else if (bbr->probe_rtt_done_stamp) { -@@ -1005,18 +946,20 @@ static void bbr_update_gains(struct sock +@@ -1005,18 +948,20 @@ static void bbr_update_gains(struct sock switch (bbr->mode) { case BBR_STARTUP: @@ -1375,7 +1377,7 @@ Signed-off-by: Alexandre Frade break; case BBR_PROBE_RTT: bbr->pacing_gain = BBR_UNIT; -@@ -1028,27 +971,1108 @@ static void bbr_update_gains(struct sock +@@ -1028,27 +973,1108 @@ static void bbr_update_gains(struct sock } } @@ -1435,7 +1437,8 @@ Signed-off-by: Alexandre Frade + +/* Has the given amount of time elapsed since we marked the phase start? */ +static bool bbr_has_elapsed_in_phase(const struct sock *sk, u32 interval_us) -+{ + { +- bbr_update_bw(sk, rs); + const struct tcp_sock *tp = tcp_sk(sk); + const struct bbr *bbr = inet_csk_ca(sk); + @@ -1456,8 +1459,7 @@ Signed-off-by: Alexandre Frade + +/* Exit STARTUP upon N consecutive rounds with ECN mark rate > ecn_thresh. */ +static void bbr_check_ecn_too_high_in_startup(struct sock *sk, u32 ce_ratio) - { -- bbr_update_bw(sk, rs); ++{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr_full_bw_reached(sk) || !bbr->ecn_eligible || @@ -2494,7 +2496,7 @@ Signed-off-by: Alexandre Frade } __bpf_kfunc static void bbr_init(struct sock *sk) -@@ -1056,20 +2080,21 @@ __bpf_kfunc static void bbr_init(struct +@@ -1056,20 +2082,21 @@ __bpf_kfunc static void bbr_init(struct struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); @@ -2521,7 +2523,7 @@ Signed-off-by: Alexandre Frade bbr->has_seen_rtt = 0; bbr_init_pacing_rate_from_rtt(sk); -@@ -1080,7 +2105,7 @@ __bpf_kfunc static void bbr_init(struct +@@ -1080,7 +2107,7 @@ __bpf_kfunc static void bbr_init(struct bbr->full_bw_cnt = 0; bbr->cycle_mstamp = 0; bbr->cycle_idx = 0; @@ -2530,7 +2532,7 @@ Signed-off-by: Alexandre Frade bbr_reset_startup_mode(sk); bbr->ack_epoch_mstamp = tp->tcp_mstamp; -@@ -1090,78 +2115,236 @@ __bpf_kfunc static void bbr_init(struct +@@ -1090,78 +2117,236 @@ __bpf_kfunc static void bbr_init(struct bbr->extra_acked[0] = 0; bbr->extra_acked[1] = 0; @@ -2795,7 +2797,7 @@ Signed-off-by: Alexandre Frade .undo_cwnd = bbr_undo_cwnd, .cwnd_event = bbr_cwnd_event, .ssthresh = bbr_ssthresh, -@@ -1174,10 +2357,11 @@ BTF_KFUNCS_START(tcp_bbr_check_kfunc_ids +@@ -1174,10 +2359,11 @@ BTF_KFUNCS_START(tcp_bbr_check_kfunc_ids BTF_ID_FLAGS(func, bbr_init) BTF_ID_FLAGS(func, bbr_main) BTF_ID_FLAGS(func, bbr_sndbuf_expand) @@ -2808,7 +2810,7 @@ Signed-off-by: Alexandre Frade BTF_ID_FLAGS(func, bbr_set_state) BTF_KFUNCS_END(tcp_bbr_check_kfunc_ids) -@@ -1210,5 +2394,12 @@ MODULE_AUTHOR("Van Jacobson "); MODULE_AUTHOR("Yuchung Cheng "); MODULE_AUTHOR("Soheil Hassas Yeganeh "); diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0017-net-tcp_bbr-v3-ensure-ECN-enabled-BBR-flows-set-ECT-.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0017-net-tcp_bbr-v3-ensure-ECN-enabled-BBR-flows-set-ECT-.patch index 3f028e5..95db621 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0017-net-tcp_bbr-v3-ensure-ECN-enabled-BBR-flows-set-ECT-.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0017-net-tcp_bbr-v3-ensure-ECN-enabled-BBR-flows-set-ECT-.patch @@ -1,7 +1,7 @@ -From 79dbc43c63d17b05e0b04c6ed68b5e24515cfe2f Mon Sep 17 00:00:00 2001 +From 45615e9d62a6b0b688a284dc712c243295df9e65 Mon Sep 17 00:00:00 2001 From: Adithya Abraham Philip Date: Fri, 11 Jun 2021 21:56:10 +0000 -Subject: net-tcp_bbr: v3: ensure ECN-enabled BBR flows set ECT +Subject: [PATCH 17/18] net-tcp_bbr: v3: ensure ECN-enabled BBR flows set ECT on retransmits Adds a new flag TCP_ECN_ECT_PERMANENT that is used by CCAs to @@ -25,17 +25,17 @@ Signed-off-by: Alexandre Frade --- a/include/net/tcp.h +++ b/include/net/tcp.h -@@ -377,6 +377,7 @@ static inline void tcp_dec_quickack_mode - #define TCP_ECN_DEMAND_CWR 4 - #define TCP_ECN_SEEN 8 - #define TCP_ECN_LOW 16 -+#define TCP_ECN_ECT_PERMANENT 32 +@@ -380,6 +380,7 @@ static inline void tcp_dec_quickack_mode + #define TCP_ECN_SEEN BIT(3) + #define TCP_ECN_MODE_ACCECN BIT(4) + #define TCP_ECN_LOW BIT(5) ++#define TCP_ECN_ECT_PERMANENT BIT(6) - enum tcp_tw_status { - TCP_TW_SUCCESS = 0, + #define TCP_ECN_DISABLED 0 + #define TCP_ECN_MODE_PENDING (TCP_ECN_MODE_RFC3168 | TCP_ECN_MODE_ACCECN) --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c -@@ -2152,6 +2152,9 @@ __bpf_kfunc static void bbr_init(struct +@@ -2154,6 +2154,9 @@ __bpf_kfunc static void bbr_init(struct bbr->plb.pause_until = 0; tp->fast_ack_mode = bbr_fast_ack_mode ? 1 : 0; diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0018-tcp-export-TCPI_OPT_ECN_LOW-in-tcp_info-tcpi_options.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0018-tcp-export-TCPI_OPT_ECN_LOW-in-tcp_info-tcpi_options.patch index 03785e1..1cd309b 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0018-tcp-export-TCPI_OPT_ECN_LOW-in-tcp_info-tcpi_options.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0018-tcp-export-TCPI_OPT_ECN_LOW-in-tcp_info-tcpi_options.patch @@ -1,7 +1,7 @@ -From 74f5a9e717fb41742cf30802e9f9c55c001d2576 Mon Sep 17 00:00:00 2001 +From 461bd4bd12039886127019682ba6e7f372d9fa0b Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sun, 23 Jul 2023 23:25:34 -0400 -Subject: tcp: export TCPI_OPT_ECN_LOW in tcp_info tcpi_options +Subject: [PATCH 18/18] tcp: export TCPI_OPT_ECN_LOW in tcp_info tcpi_options field Analogous to other important ECN information, export TCPI_OPT_ECN_LOW @@ -17,7 +17,7 @@ Signed-off-by: Alexandre Frade --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h -@@ -178,6 +178,7 @@ enum tcp_fastopen_client_fail { +@@ -184,6 +184,7 @@ enum tcp_fastopen_client_fail { #define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */ #define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */ #define TCPI_OPT_USEC_TS 64 /* usec timestamps */ @@ -27,7 +27,7 @@ Signed-off-by: Alexandre Frade * Sender's congestion state indicating normal or abnormal situations --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c -@@ -4125,6 +4125,8 @@ void tcp_get_info(struct sock *sk, struc +@@ -4159,6 +4159,8 @@ void tcp_get_info(struct sock *sk, struc info->tcpi_options |= TCPI_OPT_ECN; if (tp->ecn_flags & TCP_ECN_SEEN) info->tcpi_options |= TCPI_OPT_ECN_SEEN; diff --git a/debian/patches/patchset-xanmod/net/tcp/cloudflare/0001-tcp-Add-a-sysctl-to-skip-tcp-collapse-processing-whe.patch b/debian/patches/patchset-xanmod/net/tcp/cloudflare/0001-tcp-Add-a-sysctl-to-skip-tcp-collapse-processing-whe.patch index cf194d4..72cfd44 100644 --- a/debian/patches/patchset-xanmod/net/tcp/cloudflare/0001-tcp-Add-a-sysctl-to-skip-tcp-collapse-processing-whe.patch +++ b/debian/patches/patchset-xanmod/net/tcp/cloudflare/0001-tcp-Add-a-sysctl-to-skip-tcp-collapse-processing-whe.patch @@ -1,4 +1,4 @@ -From 2b4dc54edd1589e720e5b27e4536fd549c31f34e Mon Sep 17 00:00:00 2001 +From 1b7e9ad0803cef8cf087bb67a6e4c8d63a02405b Mon Sep 17 00:00:00 2001 From: "mfreemon@cloudflare.com" Date: Tue, 1 Mar 2022 17:06:02 -0600 Subject: [PATCH] tcp: Add a sysctl to skip tcp collapse processing when the @@ -41,7 +41,7 @@ Signed-off-by: Alexandre Frade --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h -@@ -226,6 +226,7 @@ struct netns_ipv4 { +@@ -230,6 +230,7 @@ struct netns_ipv4 { u8 sysctl_fib_notify_on_flag_change; u8 sysctl_tcp_syn_linear_timeouts; @@ -67,7 +67,7 @@ Signed-off-by: Alexandre Frade TP_PROTO(const struct sock *sk, const struct request_sock *req), --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c -@@ -1568,6 +1568,13 @@ static struct ctl_table ipv4_net_table[] +@@ -1569,6 +1569,13 @@ static struct ctl_table ipv4_net_table[] .extra2 = SYSCTL_ONE, }, { @@ -83,7 +83,7 @@ Signed-off-by: Alexandre Frade .maxlen = sizeof(u8), --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c -@@ -5672,6 +5672,7 @@ static bool tcp_prune_ofo_queue(struct s +@@ -5682,6 +5682,7 @@ static bool tcp_prune_ofo_queue(struct s static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -91,7 +91,7 @@ Signed-off-by: Alexandre Frade NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED); -@@ -5683,6 +5684,39 @@ static int tcp_prune_queue(struct sock * +@@ -5693,6 +5694,39 @@ static int tcp_prune_queue(struct sock * if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) return 0; @@ -131,7 +131,7 @@ Signed-off-by: Alexandre Frade tcp_collapse_ofo_queue(sk); if (!skb_queue_empty(&sk->sk_receive_queue)) tcp_collapse(sk, &sk->sk_receive_queue, NULL, -@@ -5701,6 +5735,8 @@ static int tcp_prune_queue(struct sock * +@@ -5711,6 +5745,8 @@ static int tcp_prune_queue(struct sock * if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) return 0; @@ -142,7 +142,7 @@ Signed-off-by: Alexandre Frade * and hopefully then we'll have sufficient space. --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c -@@ -3530,6 +3530,7 @@ static int __net_init tcp_sk_init(struct +@@ -3541,6 +3541,7 @@ static int __net_init tcp_sk_init(struct net->ipv4.sysctl_tcp_syn_linear_timeouts = 4; net->ipv4.sysctl_tcp_shrink_window = 0; diff --git a/debian/patches/patchset-xanmod/pci_acso/0001-PCI-Enable-overrides-for-missing-ACS-capabilities.patch b/debian/patches/patchset-xanmod/pci_acso/0001-PCI-Enable-overrides-for-missing-ACS-capabilities.patch index 3c21789..872ede6 100644 --- a/debian/patches/patchset-xanmod/pci_acso/0001-PCI-Enable-overrides-for-missing-ACS-capabilities.patch +++ b/debian/patches/patchset-xanmod/pci_acso/0001-PCI-Enable-overrides-for-missing-ACS-capabilities.patch @@ -1,4 +1,4 @@ -From 2eb935c59e24cc1303dcb7153261be0a1b61b38b Mon Sep 17 00:00:00 2001 +From c98d1c0e1f4b119313eb5852ccbf14b748c5d4a4 Mon Sep 17 00:00:00 2001 From: Mark Weiman Date: Sun, 12 Aug 2018 11:36:21 -0400 Subject: [PATCH] PCI: Enable overrides for missing ACS capabilities @@ -55,7 +55,7 @@ Signed-off-by: Alexandre Frade --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -4664,6 +4664,15 @@ +@@ -4691,6 +4691,15 @@ nomsi [MSI] If the PCI_MSI kernel config parameter is enabled, this kernel boot option can be used to disable the use of MSI interrupts system-wide. diff --git a/debian/patches/patchset-xanmod/valve/0001-extcon-Add-driver-for-Steam-Deck.patch b/debian/patches/patchset-xanmod/valve/0001-extcon-Add-driver-for-Steam-Deck.patch index 2eda7a9..15d40fe 100644 --- a/debian/patches/patchset-xanmod/valve/0001-extcon-Add-driver-for-Steam-Deck.patch +++ b/debian/patches/patchset-xanmod/valve/0001-extcon-Add-driver-for-Steam-Deck.patch @@ -1,7 +1,7 @@ -From cd6bf6bb5fd26e58638aa441dacd9104eb990fe5 Mon Sep 17 00:00:00 2001 +From 14d0907ef9d3f5c708d6aff478e32c64cda3d488 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Sun, 27 Feb 2022 14:46:08 -0800 -Subject: extcon: Add driver for Steam Deck +Subject: [PATCH 1/6] extcon: Add driver for Steam Deck (cherry picked from commit f9f2eddae582ae39d5f89c1218448fc259b90aa8) Signed-off-by: Cristian Ciocaltea diff --git a/debian/patches/patchset-xanmod/valve/0002-hwmon-Add-driver-for-Steam-Deck-s-EC-sensors.patch b/debian/patches/patchset-xanmod/valve/0002-hwmon-Add-driver-for-Steam-Deck-s-EC-sensors.patch index 69cdd0c..1786d69 100644 --- a/debian/patches/patchset-xanmod/valve/0002-hwmon-Add-driver-for-Steam-Deck-s-EC-sensors.patch +++ b/debian/patches/patchset-xanmod/valve/0002-hwmon-Add-driver-for-Steam-Deck-s-EC-sensors.patch @@ -1,7 +1,7 @@ -From c4da1a4d0efa203d10fdceda267816f7838c8a85 Mon Sep 17 00:00:00 2001 +From 3c5ff39975ce84f9d395349445a8742d58f16a20 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Sat, 19 Feb 2022 16:09:45 -0800 -Subject: hwmon: Add driver for Steam Deck's EC sensors +Subject: [PATCH 2/6] hwmon: Add driver for Steam Deck's EC sensors Add driver for sensors exposed by EC firmware on Steam Deck hardware. @@ -17,7 +17,7 @@ Signed-off-by: Alexandre Frade --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig -@@ -2089,6 +2089,17 @@ config SENSORS_SCH5636 +@@ -2110,6 +2110,17 @@ config SENSORS_SCH5636 This driver can also be built as a module. If so, the module will be called sch5636. @@ -37,7 +37,7 @@ Signed-off-by: Alexandre Frade depends on I2C --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile -@@ -211,6 +211,7 @@ obj-$(CONFIG_SENSORS_SMSC47M1) += smsc47 +@@ -213,6 +213,7 @@ obj-$(CONFIG_SENSORS_SMSC47M1) += smsc47 obj-$(CONFIG_SENSORS_SMSC47M192)+= smsc47m192.o obj-$(CONFIG_SENSORS_SPARX5) += sparx5-temp.o obj-$(CONFIG_SENSORS_SPD5118) += spd5118.o diff --git a/debian/patches/patchset-xanmod/valve/0003-hwmon-steamdeck-hwmon-Add-support-for-max-battery-le.patch b/debian/patches/patchset-xanmod/valve/0003-hwmon-steamdeck-hwmon-Add-support-for-max-battery-le.patch index da70213..f17b5bc 100644 --- a/debian/patches/patchset-xanmod/valve/0003-hwmon-steamdeck-hwmon-Add-support-for-max-battery-le.patch +++ b/debian/patches/patchset-xanmod/valve/0003-hwmon-steamdeck-hwmon-Add-support-for-max-battery-le.patch @@ -1,7 +1,7 @@ -From 9f7d5453fd576ddf2c810146c5f61863b52d777d Mon Sep 17 00:00:00 2001 +From 5e06cdcc7f6bf61b94a61f5b421573d2e12c0575 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Sat, 15 Jul 2023 12:58:54 -0700 -Subject: hwmon: steamdeck-hwmon: Add support for max battery +Subject: [PATCH 3/6] hwmon: steamdeck-hwmon: Add support for max battery level/rate Add support for max battery level/charge rate attributes. diff --git a/debian/patches/patchset-xanmod/valve/0004-leds-steamdeck-Add-support-for-Steam-Deck-LED.patch b/debian/patches/patchset-xanmod/valve/0004-leds-steamdeck-Add-support-for-Steam-Deck-LED.patch index e852b8c..d356aac 100644 --- a/debian/patches/patchset-xanmod/valve/0004-leds-steamdeck-Add-support-for-Steam-Deck-LED.patch +++ b/debian/patches/patchset-xanmod/valve/0004-leds-steamdeck-Add-support-for-Steam-Deck-LED.patch @@ -1,7 +1,7 @@ -From 93fc97eeb7fd11b7da124eab29c8d455331d364c Mon Sep 17 00:00:00 2001 +From a73fb7a13f21fdee3ffe8f900f32b7f3a325e60a Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Sun, 27 Feb 2022 12:58:05 -0800 -Subject: leds: steamdeck: Add support for Steam Deck LED +Subject: [PATCH 4/6] leds: steamdeck: Add support for Steam Deck LED (cherry picked from commit 85a86d19aa7022ff0555023d53aef78323a42d0c) Signed-off-by: Cristian Ciocaltea @@ -15,7 +15,7 @@ Signed-off-by: Alexandre Frade --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig -@@ -1004,6 +1004,13 @@ config LEDS_ACER_A500 +@@ -1013,6 +1013,13 @@ config LEDS_ACER_A500 This option enables support for the Power Button LED of Acer Iconia Tab A500. @@ -31,7 +31,7 @@ Signed-off-by: Alexandre Frade comment "Flash and Torch LED drivers" --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile -@@ -84,6 +84,7 @@ obj-$(CONFIG_LEDS_QNAP_MCU) += leds-qna +@@ -85,6 +85,7 @@ obj-$(CONFIG_LEDS_QNAP_MCU) += leds-qna obj-$(CONFIG_LEDS_REGULATOR) += leds-regulator.o obj-$(CONFIG_LEDS_SC27XX_BLTC) += leds-sc27xx-bltc.o obj-$(CONFIG_LEDS_ST1202) += leds-st1202.o diff --git a/debian/patches/patchset-xanmod/valve/0005-mfd-Add-MFD-core-driver-for-Steam-Deck.patch b/debian/patches/patchset-xanmod/valve/0005-mfd-Add-MFD-core-driver-for-Steam-Deck.patch index 52347cf..113d8e1 100644 --- a/debian/patches/patchset-xanmod/valve/0005-mfd-Add-MFD-core-driver-for-Steam-Deck.patch +++ b/debian/patches/patchset-xanmod/valve/0005-mfd-Add-MFD-core-driver-for-Steam-Deck.patch @@ -1,7 +1,7 @@ -From 544af2c7ba194f959e8b317efb6e82b229b8ceff Mon Sep 17 00:00:00 2001 +From eaf78d7b957552deba7222ca6bd1dae28bdd420b Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Sat, 19 Feb 2022 16:08:36 -0800 -Subject: mfd: Add MFD core driver for Steam Deck +Subject: [PATCH 5/6] mfd: Add MFD core driver for Steam Deck Add MFD core driver for Steam Deck. Doesn't really do much so far besides instantiating a number of MFD cells that implement all the @@ -19,7 +19,7 @@ Signed-off-by: Alexandre Frade --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig -@@ -2439,5 +2439,16 @@ config MFD_UPBOARD_FPGA +@@ -2422,5 +2422,16 @@ config MFD_UPBOARD_FPGA To compile this driver as a module, choose M here: the module will be called upboard-fpga. @@ -38,7 +38,7 @@ Signed-off-by: Alexandre Frade endif --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile -@@ -294,3 +294,5 @@ obj-$(CONFIG_MFD_RSMU_I2C) += rsmu_i2c.o +@@ -290,3 +290,5 @@ obj-$(CONFIG_MFD_RSMU_I2C) += rsmu_i2c.o obj-$(CONFIG_MFD_RSMU_SPI) += rsmu_spi.o rsmu_core.o obj-$(CONFIG_MFD_UPBOARD_FPGA) += upboard-fpga.o diff --git a/debian/patches/patchset-xanmod/valve/0006-mfd-steamdeck-Expose-controller-board-power-in-sysfs.patch b/debian/patches/patchset-xanmod/valve/0006-mfd-steamdeck-Expose-controller-board-power-in-sysfs.patch index 421bdf9..c877efe 100644 --- a/debian/patches/patchset-xanmod/valve/0006-mfd-steamdeck-Expose-controller-board-power-in-sysfs.patch +++ b/debian/patches/patchset-xanmod/valve/0006-mfd-steamdeck-Expose-controller-board-power-in-sysfs.patch @@ -1,7 +1,7 @@ -From cf5a7be3ab145c5743b673722ce01002dcdac3e6 Mon Sep 17 00:00:00 2001 +From 1755d1224560a5d3379489cf83efefdf6fd7d93c Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Sun, 24 Sep 2023 15:02:33 -0700 -Subject: mfd: steamdeck: Expose controller board power in sysfs +Subject: [PATCH 6/6] mfd: steamdeck: Expose controller board power in sysfs As of version 118 Deck's BIOS implements "SCBP" method that allows gating power of the controller board (VBUS). Add a basic WO method to diff --git a/debian/patches/patchset-xanmod/xanmod/0001-kbuild-Re-add-.config-file-required-to-sign-external.patch b/debian/patches/patchset-xanmod/xanmod/0001-kbuild-Re-add-.config-file-required-to-sign-external.patch index 6a1398a..aa0336e 100644 --- a/debian/patches/patchset-xanmod/xanmod/0001-kbuild-Re-add-.config-file-required-to-sign-external.patch +++ b/debian/patches/patchset-xanmod/xanmod/0001-kbuild-Re-add-.config-file-required-to-sign-external.patch @@ -1,7 +1,7 @@ -From 878cd0d9982ee6810036adce9e9c96cdb3714be1 Mon Sep 17 00:00:00 2001 +From 7479efa37dfb05263e0984ca1e1a3da22fa62414 Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Thu, 28 Nov 2024 22:55:27 +0000 -Subject: kbuild: Re-add .config file required to sign external +Subject: [PATCH 03/19] kbuild: Re-add .config file required to sign external modules Signed-off-by: Alexandre Frade diff --git a/debian/patches/patchset-xanmod/xanmod/0002-kbuild-Remove-GCC-minimal-function-alignment.patch b/debian/patches/patchset-xanmod/xanmod/0002-kbuild-Remove-GCC-minimal-function-alignment.patch index 82192ef..90961ca 100644 --- a/debian/patches/patchset-xanmod/xanmod/0002-kbuild-Remove-GCC-minimal-function-alignment.patch +++ b/debian/patches/patchset-xanmod/xanmod/0002-kbuild-Remove-GCC-minimal-function-alignment.patch @@ -1,7 +1,7 @@ -From 6e1157f40aa2de736b79766c53f87dfe7de36bb5 Mon Sep 17 00:00:00 2001 +From 0d678f81894ace50347c6223255b8263161299fe Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Sat, 31 Aug 2024 16:57:41 +0000 -Subject: kbuild: Remove GCC minimal function alignment +Subject: [PATCH 04/19] kbuild: Remove GCC minimal function alignment Signed-off-by: Alexandre Frade --- @@ -12,7 +12,7 @@ Signed-off-by: Alexandre Frade --- a/Makefile +++ b/Makefile -@@ -1055,15 +1055,8 @@ export CC_FLAGS_FPU +@@ -1058,15 +1058,8 @@ export CC_FLAGS_FPU export CC_FLAGS_NO_FPU ifneq ($(CONFIG_FUNCTION_ALIGNMENT),0) @@ -30,7 +30,7 @@ Signed-off-by: Alexandre Frade NOSTDINC_FLAGS += -nostdinc --- a/arch/Kconfig +++ b/arch/Kconfig -@@ -1723,18 +1723,6 @@ config FUNCTION_ALIGNMENT +@@ -1734,18 +1734,6 @@ config FUNCTION_ALIGNMENT default 4 if FUNCTION_ALIGNMENT_4B default 0 diff --git a/debian/patches/patchset-xanmod/xanmod/0003-XANMOD-fair-Set-scheduler-tunable-latencies-to-unsca.patch b/debian/patches/patchset-xanmod/xanmod/0003-XANMOD-fair-Set-scheduler-tunable-latencies-to-unsca.patch index c7637f4..2d880ef 100644 --- a/debian/patches/patchset-xanmod/xanmod/0003-XANMOD-fair-Set-scheduler-tunable-latencies-to-unsca.patch +++ b/debian/patches/patchset-xanmod/xanmod/0003-XANMOD-fair-Set-scheduler-tunable-latencies-to-unsca.patch @@ -1,7 +1,7 @@ -From 91f0f89ac5315be99ea1aea5d732c68311f68bda Mon Sep 17 00:00:00 2001 +From 3cd805916cf93d70ef73a006ed54c737c1bb44ca Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Thu, 11 May 2023 19:41:41 +0000 -Subject: XANMOD: fair: Set scheduler tunable latencies to +Subject: [PATCH 05/19] XANMOD: fair: Set scheduler tunable latencies to unscaled Signed-off-by: Alexandre Frade diff --git a/debian/patches/patchset-xanmod/xanmod/0004-XANMOD-sched-Add-yield_type-sysctl-to-reduce-or-disa.patch b/debian/patches/patchset-xanmod/xanmod/0004-XANMOD-sched-Add-yield_type-sysctl-to-reduce-or-disa.patch index fb1d8c4..680dd01 100644 --- a/debian/patches/patchset-xanmod/xanmod/0004-XANMOD-sched-Add-yield_type-sysctl-to-reduce-or-disa.patch +++ b/debian/patches/patchset-xanmod/xanmod/0004-XANMOD-sched-Add-yield_type-sysctl-to-reduce-or-disa.patch @@ -1,14 +1,14 @@ -From 5a126e141df4850073a8f057cc5eeb22e8f6ea57 Mon Sep 17 00:00:00 2001 +From fa6afaf41316657a46bc70c9e942051e15e837fd Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Sun, 15 Sep 2024 23:03:38 +0000 -Subject: XANMOD: sched: Add yield_type sysctl to reduce or +Subject: [PATCH 06/19] XANMOD: sched: Add yield_type sysctl to reduce or disable sched_yield Signed-off-by: Alexandre Frade --- kernel/sched/syscalls.c | 16 +++++++++++++++- - kernel/sysctl.c | 10 ++++++++++ - 2 files changed, 25 insertions(+), 1 deletion(-) + kernel/sysctl.c | 11 +++++++++++ + 2 files changed, 26 insertions(+), 1 deletion(-) --- a/kernel/sched/syscalls.c +++ b/kernel/sched/syscalls.c @@ -45,15 +45,16 @@ Signed-off-by: Alexandre Frade rq_unlock_irq(rq, &rf); --- a/kernel/sysctl.c +++ b/kernel/sysctl.c -@@ -97,6 +97,7 @@ static const int six_hundred_forty_kb = - #endif +@@ -80,6 +80,8 @@ EXPORT_SYMBOL_GPL(sysctl_long_vals); + #if defined(CONFIG_SYSCTL) +extern int sysctl_sched_yield_type; ++ + /* Constants used for minimum and maximum */ static const int ngroups_max = NGROUPS_MAX; static const int cap_last_cap = CAP_LAST_CAP; - -@@ -1630,6 +1631,15 @@ static const struct ctl_table kern_table +@@ -1608,6 +1610,15 @@ static const struct ctl_table kern_table .proc_handler = proc_dointvec, }, #endif diff --git a/debian/patches/patchset-xanmod/xanmod/0005-XANMOD-block-mq-deadline-Increase-write-priority-to-.patch b/debian/patches/patchset-xanmod/xanmod/0005-XANMOD-block-mq-deadline-Increase-write-priority-to-.patch index 768c7e6..4d719ee 100644 --- a/debian/patches/patchset-xanmod/xanmod/0005-XANMOD-block-mq-deadline-Increase-write-priority-to-.patch +++ b/debian/patches/patchset-xanmod/xanmod/0005-XANMOD-block-mq-deadline-Increase-write-priority-to-.patch @@ -1,7 +1,7 @@ -From f91c466320368433d644a1bbaeb303b682c6b7d1 Mon Sep 17 00:00:00 2001 +From 5dc5d7a3a1c25cd5d7c2079bbe56ff7c0066c76a Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Wed, 11 May 2022 18:56:51 +0000 -Subject: XANMOD: block/mq-deadline: Increase write priority to +Subject: [PATCH 07/19] XANMOD: block/mq-deadline: Increase write priority to improve responsiveness Signed-off-by: Alexandre Frade diff --git a/debian/patches/patchset-xanmod/xanmod/0006-XANMOD-block-mq-deadline-Disable-front_merges-by-def.patch b/debian/patches/patchset-xanmod/xanmod/0006-XANMOD-block-mq-deadline-Disable-front_merges-by-def.patch index f1fd3a3..4234564 100644 --- a/debian/patches/patchset-xanmod/xanmod/0006-XANMOD-block-mq-deadline-Disable-front_merges-by-def.patch +++ b/debian/patches/patchset-xanmod/xanmod/0006-XANMOD-block-mq-deadline-Disable-front_merges-by-def.patch @@ -1,7 +1,7 @@ -From 99aceb32885686182f2e38ed6c19a380828249b7 Mon Sep 17 00:00:00 2001 +From 83f38053e977907d085d7f27a24f1b2844a03f1c Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Thu, 6 Jan 2022 16:59:01 +0000 -Subject: XANMOD: block/mq-deadline: Disable front_merges by +Subject: [PATCH 08/19] XANMOD: block/mq-deadline: Disable front_merges by default Signed-off-by: Alexandre Frade diff --git a/debian/patches/patchset-xanmod/xanmod/0007-XANMOD-block-Set-rq_affinity-to-force-complete-I-O-r.patch b/debian/patches/patchset-xanmod/xanmod/0007-XANMOD-block-Set-rq_affinity-to-force-complete-I-O-r.patch index 41191af..7ce12cd 100644 --- a/debian/patches/patchset-xanmod/xanmod/0007-XANMOD-block-Set-rq_affinity-to-force-complete-I-O-r.patch +++ b/debian/patches/patchset-xanmod/xanmod/0007-XANMOD-block-Set-rq_affinity-to-force-complete-I-O-r.patch @@ -1,7 +1,7 @@ -From e664c30c44caccc43b50a7cde90d4ad2a57faef2 Mon Sep 17 00:00:00 2001 +From 6c75a84f9f89c848e76650cc66672246fa62843f Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Mon, 16 Sep 2024 15:36:01 +0000 -Subject: XANMOD: block: Set rq_affinity to force complete I/O +Subject: [PATCH 09/19] XANMOD: block: Set rq_affinity to force complete I/O requests on same CPU Signed-off-by: Alexandre Frade @@ -11,7 +11,7 @@ Signed-off-by: Alexandre Frade --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h -@@ -626,7 +626,8 @@ enum { +@@ -647,7 +647,8 @@ enum { QUEUE_FLAG_MAX }; diff --git a/debian/patches/patchset-xanmod/xanmod/0008-XANMOD-blk-wbt-Set-wbt_default_latency_nsec-to-2msec.patch b/debian/patches/patchset-xanmod/xanmod/0008-XANMOD-blk-wbt-Set-wbt_default_latency_nsec-to-2msec.patch index fd867a7..5d5e95e 100644 --- a/debian/patches/patchset-xanmod/xanmod/0008-XANMOD-blk-wbt-Set-wbt_default_latency_nsec-to-2msec.patch +++ b/debian/patches/patchset-xanmod/xanmod/0008-XANMOD-blk-wbt-Set-wbt_default_latency_nsec-to-2msec.patch @@ -1,7 +1,7 @@ -From 34db71a0c7669de56fb221bacb4955012f52efa8 Mon Sep 17 00:00:00 2001 +From 287c275293025e956f2144e55de8cc51eec0811b Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Mon, 15 Jul 2024 04:50:34 +0000 -Subject: XANMOD: blk-wbt: Set wbt_default_latency_nsec() to +Subject: [PATCH 10/19] XANMOD: blk-wbt: Set wbt_default_latency_nsec() to 2msec Signed-off-by: Alexandre Frade @@ -11,7 +11,7 @@ Signed-off-by: Alexandre Frade --- a/block/blk-wbt.c +++ b/block/blk-wbt.c -@@ -730,14 +730,8 @@ EXPORT_SYMBOL_GPL(wbt_enable_default); +@@ -727,14 +727,8 @@ EXPORT_SYMBOL_GPL(wbt_enable_default); u64 wbt_default_latency_nsec(struct request_queue *q) { diff --git a/debian/patches/patchset-xanmod/xanmod/0009-XANMOD-kconfig-add-500Hz-timer-interrupt-kernel-conf.patch b/debian/patches/patchset-xanmod/xanmod/0009-XANMOD-kconfig-add-500Hz-timer-interrupt-kernel-conf.patch index 7525e40..aa45346 100644 --- a/debian/patches/patchset-xanmod/xanmod/0009-XANMOD-kconfig-add-500Hz-timer-interrupt-kernel-conf.patch +++ b/debian/patches/patchset-xanmod/xanmod/0009-XANMOD-kconfig-add-500Hz-timer-interrupt-kernel-conf.patch @@ -1,7 +1,7 @@ -From 6f6902c8942b881988088c7f7d61053b41f00f0a Mon Sep 17 00:00:00 2001 +From 25c1d0ad74a27ac80dbda2840eba4fe53046ed55 Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Mon, 29 Jan 2018 17:26:15 +0000 -Subject: XANMOD: kconfig: add 500Hz timer interrupt kernel +Subject: [PATCH 11/19] XANMOD: kconfig: add 500Hz timer interrupt kernel config option Signed-off-by: Alexandre Frade diff --git a/debian/patches/patchset-xanmod/xanmod/0010-XANMOD-dcache-cache_pressure-50-decreases-the-rate-a.patch b/debian/patches/patchset-xanmod/xanmod/0010-XANMOD-dcache-cache_pressure-50-decreases-the-rate-a.patch index bae2bbe..bffd409 100644 --- a/debian/patches/patchset-xanmod/xanmod/0010-XANMOD-dcache-cache_pressure-50-decreases-the-rate-a.patch +++ b/debian/patches/patchset-xanmod/xanmod/0010-XANMOD-dcache-cache_pressure-50-decreases-the-rate-a.patch @@ -1,7 +1,7 @@ -From 269ed90bb0c714fc237be05611c82804f81b7038 Mon Sep 17 00:00:00 2001 +From 185191cd2a98629f35cb5cd6c0116ceb33635dd8 Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Mon, 29 Jan 2018 16:59:22 +0000 -Subject: XANMOD: dcache: cache_pressure = 50 decreases the rate +Subject: [PATCH 12/19] XANMOD: dcache: cache_pressure = 50 decreases the rate at which VFS caches are reclaimed Signed-off-by: Alexandre Frade @@ -15,8 +15,8 @@ Signed-off-by: Alexandre Frade * If no ancestor relationship: * arbitrary, since it's serialized on rename_lock */ --int sysctl_vfs_cache_pressure __read_mostly = 100; -+int sysctl_vfs_cache_pressure __read_mostly = 50; - EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); +-static int sysctl_vfs_cache_pressure __read_mostly = 100; ++static int sysctl_vfs_cache_pressure __read_mostly = 50; - __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); + unsigned long vfs_pressure_ratio(unsigned long val) + { diff --git a/debian/patches/patchset-xanmod/xanmod/0011-XANMOD-mm-Raise-max_map_count-default-value.patch b/debian/patches/patchset-xanmod/xanmod/0011-XANMOD-mm-Raise-max_map_count-default-value.patch index 8a120b0..41c0e97 100644 --- a/debian/patches/patchset-xanmod/xanmod/0011-XANMOD-mm-Raise-max_map_count-default-value.patch +++ b/debian/patches/patchset-xanmod/xanmod/0011-XANMOD-mm-Raise-max_map_count-default-value.patch @@ -1,7 +1,7 @@ -From ba310efa15e3c9677121c31e79b72695bcca87df Mon Sep 17 00:00:00 2001 +From 653701587608c8113dd4c941526104cea83d697e Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Sun, 28 Apr 2024 09:06:54 +0000 -Subject: XANMOD: mm: Raise max_map_count default value +Subject: [PATCH 13/19] XANMOD: mm: Raise max_map_count default value Signed-off-by: Alexandre Frade --- @@ -11,7 +11,7 @@ Signed-off-by: Alexandre Frade --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst -@@ -461,7 +461,7 @@ While most applications need less than a +@@ -470,7 +470,7 @@ While most applications need less than a programs, particularly malloc debuggers, may consume lots of them, e.g., up to one or two maps per allocation. @@ -22,7 +22,7 @@ Signed-off-by: Alexandre Frade mem_profiling --- a/include/linux/mm.h +++ b/include/linux/mm.h -@@ -192,17 +192,18 @@ static inline void __mm_zero_struct_page +@@ -179,17 +179,18 @@ static inline void __mm_zero_struct_page * * When a program's coredump is generated as ELF format, a section is created * per a vma. In ELF, the number of sections is represented in unsigned short. diff --git a/debian/patches/patchset-xanmod/xanmod/0012-XANMOD-mm-vmscan-Set-minimum-amount-of-swapping.patch b/debian/patches/patchset-xanmod/xanmod/0012-XANMOD-mm-vmscan-Reduce-amount-of-swapping.patch similarity index 65% rename from debian/patches/patchset-xanmod/xanmod/0012-XANMOD-mm-vmscan-Set-minimum-amount-of-swapping.patch rename to debian/patches/patchset-xanmod/xanmod/0012-XANMOD-mm-vmscan-Reduce-amount-of-swapping.patch index e593597..2cf5657 100644 --- a/debian/patches/patchset-xanmod/xanmod/0012-XANMOD-mm-vmscan-Set-minimum-amount-of-swapping.patch +++ b/debian/patches/patchset-xanmod/xanmod/0012-XANMOD-mm-vmscan-Reduce-amount-of-swapping.patch @@ -1,7 +1,7 @@ -From 14ff7a682d0936937d6813105484da7b6245aabb Mon Sep 17 00:00:00 2001 +From 7ede458d310744257808696e599b8e9b11333dd0 Mon Sep 17 00:00:00 2001 From: Alexandre Frade -Date: Wed, 14 Aug 2024 18:54:53 +0000 -Subject: XANMOD: mm/vmscan: Set minimum amount of swapping +Date: Fri, 30 May 2025 19:58:58 +0000 +Subject: [PATCH 14/19] XANMOD: mm/vmscan: Reduce amount of swapping Signed-off-by: Alexandre Frade --- @@ -15,7 +15,7 @@ Signed-off-by: Alexandre Frade * From 0 .. MAX_SWAPPINESS. Higher means more swappy. */ -int vm_swappiness = 60; -+int vm_swappiness = 1; ++int vm_swappiness = 10; #ifdef CONFIG_MEMCG diff --git a/debian/patches/patchset-xanmod/xanmod/0013-XANMOD-sched-autogroup-Add-kernel-parameter-and-conf.patch b/debian/patches/patchset-xanmod/xanmod/0013-XANMOD-sched-autogroup-Add-kernel-parameter-and-conf.patch index 5bb0a6d..0c38621 100644 --- a/debian/patches/patchset-xanmod/xanmod/0013-XANMOD-sched-autogroup-Add-kernel-parameter-and-conf.patch +++ b/debian/patches/patchset-xanmod/xanmod/0013-XANMOD-sched-autogroup-Add-kernel-parameter-and-conf.patch @@ -1,7 +1,7 @@ -From 2354e3f9a9b181ca2e150c27c57a01049b52b6f0 Mon Sep 17 00:00:00 2001 +From d5b37aa9862773c0cdf95676cc15d97416311ba2 Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Wed, 15 Jun 2022 17:07:29 +0000 -Subject: XANMOD: sched/autogroup: Add kernel parameter and +Subject: [PATCH 15/19] XANMOD: sched/autogroup: Add kernel parameter and config option to enable/disable autogroup feature by default Signed-off-by: Alexandre Frade @@ -13,7 +13,7 @@ Signed-off-by: Alexandre Frade --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -511,6 +511,10 @@ +@@ -507,6 +507,10 @@ Format: (must be >=0) Default: 64 @@ -24,7 +24,7 @@ Signed-off-by: Alexandre Frade bau= [X86_UV] Enable the BAU on SGI UV. The default behavior is to disable the BAU (i.e. bau=0). Format: { "0" | "1" } -@@ -4059,8 +4063,6 @@ +@@ -4086,8 +4090,6 @@ noapictimer [APIC,X86] Don't set up the APIC timer @@ -35,7 +35,7 @@ Signed-off-by: Alexandre Frade no_console_suspend --- a/init/Kconfig +++ b/init/Kconfig -@@ -1375,6 +1375,18 @@ config SCHED_AUTOGROUP +@@ -1374,6 +1374,18 @@ config SCHED_AUTOGROUP desktop applications. Task group autogeneration is currently based upon task session. diff --git a/debian/patches/patchset-xanmod/xanmod/0014-XANMOD-cpufreq-tunes-ondemand-and-conservative-gover.patch b/debian/patches/patchset-xanmod/xanmod/0014-XANMOD-cpufreq-tunes-ondemand-and-conservative-gover.patch index 55c2ef4..d3314f3 100644 --- a/debian/patches/patchset-xanmod/xanmod/0014-XANMOD-cpufreq-tunes-ondemand-and-conservative-gover.patch +++ b/debian/patches/patchset-xanmod/xanmod/0014-XANMOD-cpufreq-tunes-ondemand-and-conservative-gover.patch @@ -1,7 +1,7 @@ -From fe02f80f7e47a5ae805393bcba3dbe8c2bd74b0e Mon Sep 17 00:00:00 2001 +From 475f127d322b1fe12a8f486e779ec60cc03220bc Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Tue, 31 Mar 2020 13:32:08 -0300 -Subject: XANMOD: cpufreq: tunes ondemand and conservative +Subject: [PATCH 16/19] XANMOD: cpufreq: tunes ondemand and conservative governor for performance Signed-off-by: Alexandre Frade diff --git a/debian/patches/patchset-xanmod/xanmod/0015-XANMOD-lib-kconfig.debug-disable-default-SYMBOLIC_ER.patch b/debian/patches/patchset-xanmod/xanmod/0015-XANMOD-lib-kconfig.debug-disable-default-SYMBOLIC_ER.patch index 7054920..d62c710 100644 --- a/debian/patches/patchset-xanmod/xanmod/0015-XANMOD-lib-kconfig.debug-disable-default-SYMBOLIC_ER.patch +++ b/debian/patches/patchset-xanmod/xanmod/0015-XANMOD-lib-kconfig.debug-disable-default-SYMBOLIC_ER.patch @@ -1,7 +1,7 @@ -From f2c2f7ec98ca5bfda92d4691af46403348ae0d77 Mon Sep 17 00:00:00 2001 +From ecbc96ba0c56aa4c94c1a4bcb3184cc79fad1d3d Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Mon, 16 Sep 2024 08:09:56 +0000 -Subject: XANMOD: lib/kconfig.debug: disable default +Subject: [PATCH 17/19] XANMOD: lib/kconfig.debug: disable default SYMBOLIC_ERRNAME and DEBUG_BUGVERBOSE Signed-off-by: Alexandre Frade @@ -12,14 +12,14 @@ Signed-off-by: Alexandre Frade --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig -@@ -24,7 +24,6 @@ config BCACHEFS_FS +@@ -23,7 +23,6 @@ config BCACHEFS_FS select XOR_BLOCKS select XXHASH select SRCU - select SYMBOLIC_ERRNAME select MIN_HEAP + select XARRAY_MULTI help - The bcachefs filesystem - a modern, copy on write filesystem, with --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -190,7 +190,7 @@ config DYNAMIC_DEBUG_CORE diff --git a/debian/patches/patchset-xanmod/xanmod/0016-XANMOD-scripts-setlocalversion-remove-tag-for-git-re.patch b/debian/patches/patchset-xanmod/xanmod/0016-XANMOD-scripts-setlocalversion-remove-tag-for-git-re.patch index 963e803..23c5cc4 100644 --- a/debian/patches/patchset-xanmod/xanmod/0016-XANMOD-scripts-setlocalversion-remove-tag-for-git-re.patch +++ b/debian/patches/patchset-xanmod/xanmod/0016-XANMOD-scripts-setlocalversion-remove-tag-for-git-re.patch @@ -1,7 +1,7 @@ -From c706cd7134b55e1f188de6ea23e4b25b0497f18e Mon Sep 17 00:00:00 2001 +From 3606c4614583729c8471c98d171d42ff895b38c4 Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Sun, 29 May 2022 00:57:40 +0000 -Subject: XANMOD: scripts/setlocalversion: remove "+" tag for git +Subject: [PATCH 18/19] XANMOD: scripts/setlocalversion: remove "+" tag for git repo short version Signed-off-by: Alexandre Frade diff --git a/debian/patches/patchset-xanmod/xanmod/0017-XANMOD-scripts-setlocalversion-Move-localversion-fil.patch b/debian/patches/patchset-xanmod/xanmod/0017-XANMOD-scripts-setlocalversion-Move-localversion-fil.patch index 0495cd2..b82a665 100644 --- a/debian/patches/patchset-xanmod/xanmod/0017-XANMOD-scripts-setlocalversion-Move-localversion-fil.patch +++ b/debian/patches/patchset-xanmod/xanmod/0017-XANMOD-scripts-setlocalversion-Move-localversion-fil.patch @@ -1,7 +1,7 @@ -From 4c8da54c3f59b0e71408b0c980ffb162fc4bb022 Mon Sep 17 00:00:00 2001 +From 927fa9cd3d3b0a6f65c44d492c263d0669ec4b7e Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Mon, 24 Apr 2023 04:50:34 +0000 -Subject: XANMOD: scripts/setlocalversion: Move localversion* +Subject: [PATCH 19/19] XANMOD: scripts/setlocalversion: Move localversion* files to the end Signed-off-by: Alexandre Frade diff --git a/debian/patches/patchset-zen/fixes/0001-drivers-firmware-skip-simpledrm-if-nvidia-drm.modese.patch b/debian/patches/patchset-zen/fixes/0001-drivers-firmware-skip-simpledrm-if-nvidia-drm.modese.patch index e7811ec..2b0062a 100644 --- a/debian/patches/patchset-zen/fixes/0001-drivers-firmware-skip-simpledrm-if-nvidia-drm.modese.patch +++ b/debian/patches/patchset-zen/fixes/0001-drivers-firmware-skip-simpledrm-if-nvidia-drm.modese.patch @@ -1,4 +1,4 @@ -From 5ac90c5aed97728c8f4f64c02d75334c84a801ef Mon Sep 17 00:00:00 2001 +From bf57be2df6a113afba465bea635444764a7d0f11 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 19 May 2022 14:40:07 +0200 Subject: drivers/firmware: skip simpledrm if nvidia-drm.modeset=1 is set @@ -74,7 +74,7 @@ Cherry-picked-for: https://gitlab.archlinux.org/archlinux/packaging/packages/lin static struct platform_device *pd; static DEFINE_MUTEX(disable_lock); static bool disabled; -@@ -164,7 +180,7 @@ static __init int sysfb_init(void) +@@ -165,7 +181,7 @@ static __init int sysfb_init(void) /* try to create a simple-framebuffer device */ compatible = sysfb_parse_mode(si, &mode); diff --git a/debian/patches/patchset-zen/fixes/0002-Bluetooth-hci_event-Fix-not-using-key-encryption-siz.patch b/debian/patches/patchset-zen/fixes/0002-Bluetooth-hci_event-Fix-not-using-key-encryption-siz.patch deleted file mode 100644 index 7cafc11..0000000 --- a/debian/patches/patchset-zen/fixes/0002-Bluetooth-hci_event-Fix-not-using-key-encryption-siz.patch +++ /dev/null @@ -1,191 +0,0 @@ -From 1d8e5829e40e6547e10a5f479e2a6fea0d412132 Mon Sep 17 00:00:00 2001 -From: Luiz Augusto von Dentz -Date: Wed, 30 Apr 2025 15:07:03 -0400 -Subject: Bluetooth: hci_event: Fix not using key encryption size when its - known - -This fixes the regression introduced by 50c1241e6a8a ("Bluetooth: l2cap: -Check encryption key size on incoming connection") introduced a check for -l2cap_check_enc_key_size which checks for hcon->enc_key_size which may -not be initialized if HCI_OP_READ_ENC_KEY_SIZE is still pending. - -If the key encryption size is known, due previously reading it using -HCI_OP_READ_ENC_KEY_SIZE, then store it as part of link_key/smp_ltk -structures so the next time the encryption is changed their values are -used as conn->enc_key_size thus avoiding the racing against -HCI_OP_READ_ENC_KEY_SIZE. - -Now that the enc_size is stored as part of key the information the code -then attempts to check that there is no downgrade of security if -HCI_OP_READ_ENC_KEY_SIZE returns a value smaller than what has been -previously stored. - -Link: https://bugzilla.kernel.org/show_bug.cgi?id=220061 -Link: https://bugzilla.kernel.org/show_bug.cgi?id=220063 -Fixes: 522e9ed157e3 ("Bluetooth: l2cap: Check encryption key size on incoming connection") -Signed-off-by: Luiz Augusto von Dentz -Cherry-picked-for: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/137 ---- - include/net/bluetooth/hci_core.h | 1 + - net/bluetooth/hci_conn.c | 24 +++++++++++ - net/bluetooth/hci_event.c | 73 ++++++++++++++++++-------------- - 3 files changed, 67 insertions(+), 31 deletions(-) - ---- a/include/net/bluetooth/hci_core.h -+++ b/include/net/bluetooth/hci_core.h -@@ -1778,6 +1778,7 @@ struct hci_conn_params *hci_pend_le_acti - void hci_uuids_clear(struct hci_dev *hdev); - - void hci_link_keys_clear(struct hci_dev *hdev); -+u8 *hci_conn_key_enc_size(struct hci_conn *conn); - struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr); - struct link_key *hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, - bdaddr_t *bdaddr, u8 *val, u8 type, ---- a/net/bluetooth/hci_conn.c -+++ b/net/bluetooth/hci_conn.c -@@ -2897,3 +2897,27 @@ int hci_abort_conn(struct hci_conn *conn - */ - return hci_cmd_sync_run_once(hdev, abort_conn_sync, conn, NULL); - } -+ -+u8 *hci_conn_key_enc_size(struct hci_conn *conn) -+{ -+ if (conn->type == ACL_LINK) { -+ struct link_key *key; -+ -+ key = hci_find_link_key(conn->hdev, &conn->dst); -+ if (!key) -+ return NULL; -+ -+ return &key->pin_len; -+ } else if (conn->type == LE_LINK) { -+ struct smp_ltk *ltk; -+ -+ ltk = hci_find_ltk(conn->hdev, &conn->dst, conn->dst_type, -+ conn->role); -+ if (!ltk) -+ return NULL; -+ -+ return <k->enc_size; -+ } -+ -+ return NULL; -+} ---- a/net/bluetooth/hci_event.c -+++ b/net/bluetooth/hci_event.c -@@ -739,10 +739,17 @@ static u8 hci_cc_read_enc_key_size(struc - handle); - conn->enc_key_size = 0; - } else { -+ u8 *key_enc_size = hci_conn_key_enc_size(conn); -+ - conn->enc_key_size = rp->key_size; - status = 0; - -- if (conn->enc_key_size < hdev->min_enc_key_size) { -+ /* Attempt to check if the key size is too small or if it has -+ * been downgraded from the last time it was stored as part of -+ * the link_key. -+ */ -+ if (conn->enc_key_size < hdev->min_enc_key_size || -+ (key_enc_size && conn->enc_key_size < *key_enc_size)) { - /* As slave role, the conn->state has been set to - * BT_CONNECTED and l2cap conn req might not be received - * yet, at this moment the l2cap layer almost does -@@ -755,6 +762,10 @@ static u8 hci_cc_read_enc_key_size(struc - clear_bit(HCI_CONN_ENCRYPT, &conn->flags); - clear_bit(HCI_CONN_AES_CCM, &conn->flags); - } -+ -+ /* Update the key encryption size with the connection one */ -+ if (key_enc_size && *key_enc_size != conn->enc_key_size) -+ *key_enc_size = conn->enc_key_size; - } - - hci_encrypt_cfm(conn, status); -@@ -3065,6 +3076,34 @@ static void hci_inquiry_result_evt(struc - hci_dev_unlock(hdev); - } - -+static int hci_read_enc_key_size(struct hci_dev *hdev, struct hci_conn *conn) -+{ -+ struct hci_cp_read_enc_key_size cp; -+ u8 *key_enc_size = hci_conn_key_enc_size(conn); -+ -+ if (!read_key_size_capable(hdev)) { -+ conn->enc_key_size = HCI_LINK_KEY_SIZE; -+ return -EOPNOTSUPP; -+ } -+ -+ bt_dev_dbg(hdev, "hcon %p", conn); -+ -+ memset(&cp, 0, sizeof(cp)); -+ cp.handle = cpu_to_le16(conn->handle); -+ -+ /* If the key enc_size is already known, use it as conn->enc_key_size, -+ * otherwise use hdev->min_enc_key_size so the likes of -+ * l2cap_check_enc_key_size don't fail while waiting for -+ * HCI_OP_READ_ENC_KEY_SIZE response. -+ */ -+ if (key_enc_size && *key_enc_size) -+ conn->enc_key_size = *key_enc_size; -+ else -+ conn->enc_key_size = hdev->min_enc_key_size; -+ -+ return hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE, sizeof(cp), &cp); -+} -+ - static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) - { -@@ -3157,23 +3196,11 @@ static void hci_conn_complete_evt(struct - if (ev->encr_mode == 1 && !test_bit(HCI_CONN_ENCRYPT, &conn->flags) && - ev->link_type == ACL_LINK) { - struct link_key *key; -- struct hci_cp_read_enc_key_size cp; - - key = hci_find_link_key(hdev, &ev->bdaddr); - if (key) { - set_bit(HCI_CONN_ENCRYPT, &conn->flags); -- -- if (!read_key_size_capable(hdev)) { -- conn->enc_key_size = HCI_LINK_KEY_SIZE; -- } else { -- cp.handle = cpu_to_le16(conn->handle); -- if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE, -- sizeof(cp), &cp)) { -- bt_dev_err(hdev, "sending read key size failed"); -- conn->enc_key_size = HCI_LINK_KEY_SIZE; -- } -- } -- -+ hci_read_enc_key_size(hdev, conn); - hci_encrypt_cfm(conn, ev->status); - } - } -@@ -3612,24 +3639,8 @@ static void hci_encrypt_change_evt(struc - - /* Try reading the encryption key size for encrypted ACL links */ - if (!ev->status && ev->encrypt && conn->type == ACL_LINK) { -- struct hci_cp_read_enc_key_size cp; -- -- /* Only send HCI_Read_Encryption_Key_Size if the -- * controller really supports it. If it doesn't, assume -- * the default size (16). -- */ -- if (!read_key_size_capable(hdev)) { -- conn->enc_key_size = HCI_LINK_KEY_SIZE; -+ if (hci_read_enc_key_size(hdev, conn)) - goto notify; -- } -- -- cp.handle = cpu_to_le16(conn->handle); -- if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE, -- sizeof(cp), &cp)) { -- bt_dev_err(hdev, "sending read key size failed"); -- conn->enc_key_size = HCI_LINK_KEY_SIZE; -- goto notify; -- } - - goto unlock; - } diff --git a/debian/patches/patchset-zen/fixes/0002-x86-cpu-Help-users-notice-when-running-old-Intel-mic.patch b/debian/patches/patchset-zen/fixes/0002-x86-cpu-Help-users-notice-when-running-old-Intel-mic.patch new file mode 100644 index 0000000..4c08530 --- /dev/null +++ b/debian/patches/patchset-zen/fixes/0002-x86-cpu-Help-users-notice-when-running-old-Intel-mic.patch @@ -0,0 +1,471 @@ +From a66b7c34e1f618194d288d1b1982af805d5be57f Mon Sep 17 00:00:00 2001 +From: Dave Hansen +Date: Tue, 22 Apr 2025 08:32:47 +0200 +Subject: x86/cpu: Help users notice when running old Intel microcode + +Old microcode is bad for users and for kernel developers. + +For users, it exposes them to known fixed security and/or functional +issues. These obviously rarely result in instant dumpster fires in +every environment. But it is as important to keep your microcode up +to date as it is to keep your kernel up to date. + +Old microcode also makes kernels harder to debug. A developer looking +at an oops need to consider kernel bugs, known CPU issues and unknown +CPU issues as possible causes. If they know the microcode is up to +date, they can mostly eliminate known CPU issues as the cause. + +Make it easier to tell if CPU microcode is out of date. Add a list +of released microcode. If the loaded microcode is older than the +release, tell users in a place that folks can find it: + + /sys/devices/system/cpu/vulnerabilities/old_microcode + +Tell kernel kernel developers about it with the existing taint +flag: + + TAINT_CPU_OUT_OF_SPEC + +== Discussion == + +When a user reports a potential kernel issue, it is very common +to ask them to reproduce the issue on mainline. Running mainline, +they will (independently from the distro) acquire a more up-to-date +microcode version list. If their microcode is old, they will +get a warning about the taint and kernel developers can take that +into consideration when debugging. + +Just like any other entry in "vulnerabilities/", users are free to +make their own assessment of their exposure. + +== Microcode Revision Discussion == + +The microcode versions in the table were generated from the Intel +microcode git repo: + + 8ac9378a8487 ("microcode-20241112 Release") + +which as of this writing lags behind the latest microcode-20250211. + +It can be argued that the versions that the kernel picks to call "old" +should be a revision or two old. Which specific version is picked is +less important to me than picking *a* version and enforcing it. + +This repository contains only microcode versions that Intel has deemed +to be OS-loadable. It is quite possible that the BIOS has loaded a +newer microcode than the latest in this repo. If this happens, the +system is considered to have new microcode, not old. + +Specifically, the sysfs file and taint flag answer the question: + + Is the CPU running on the latest OS-loadable microcode, + or something even later that the BIOS loaded? + +In other words, Intel never publishes an authoritative list of CPUs +and latest microcode revisions. Until it does, this is the best that +Linux can do. + +Also note that the "intel-ucode-defs.h" file is simple, ugly and +has lots of magic numbers. That's on purpose and should allow a +single file to be shared across lots of stable kernel regardless of if +they have the new "VFM" infrastructure or not. It was generated with +a dumb script. + +== FAQ == + +Q: Does this tell me if my system is secure or insecure? +A: No. It only tells you if your microcode was old when the + system booted. + +Q: Should the kernel warn if the microcode list itself is too old? +A: No. New kernels will get new microcode lists, both mainline + and stable. The only way to have an old list is to be running + an old kernel in which case you have bigger problems. + +Q: Is this for security or functional issues? +A: Both. + +Q: If a given microcode update only has functional problems but + no security issues, will it be considered old? +A: Yes. All microcode image versions within a microcode release + are treated identically. Intel appears to make security + updates without disclosing them in the release notes. Thus, + all updates are considered to be security-relevant. + +Q: Who runs old microcode? +A: Anybody with an old distro. This happens all the time inside + of Intel where there are lots of weird systems in labs that + might not be getting regular distro updates and might also + be running rather exotic microcode images. + +Q: If I update my microcode after booting will it stop saying + "Vulnerable"? +A: No. Just like all the other vulnerabilies, you need to + reboot before the kernel will reassess your vulnerability. + +Signed-off-by: Dave Hansen +Signed-off-by: Ingo Molnar +Cc: "Ahmed S. Darwish" +Cc: Andrew Cooper +Cc: Andy Lutomirski +Cc: Brian Gerst +Cc: John Ogness +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: H. Peter Anvin +Cc: Kees Cook +Cc: Linus Torvalds +Link: https://lore.kernel.org/all/20250421195659.CF426C07%40davehans-spike.ostc.intel.com +--- + .../ABI/testing/sysfs-devices-system-cpu | 1 + + Documentation/admin-guide/hw-vuln/index.rst | 1 + + .../admin-guide/hw-vuln/old_microcode.rst | 21 +++ + arch/x86/include/asm/cpufeatures.h | 6 +- + arch/x86/kernel/cpu/bugs.c | 16 ++ + arch/x86/kernel/cpu/common.c | 42 +++++ + .../kernel/cpu/microcode/intel-ucode-defs.h | 150 ++++++++++++++++++ + drivers/base/cpu.c | 3 + + include/linux/cpu.h | 2 + + 9 files changed, 240 insertions(+), 2 deletions(-) + create mode 100644 Documentation/admin-guide/hw-vuln/old_microcode.rst + create mode 100644 arch/x86/kernel/cpu/microcode/intel-ucode-defs.h + +--- a/Documentation/ABI/testing/sysfs-devices-system-cpu ++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu +@@ -517,6 +517,7 @@ What: /sys/devices/system/cpu/vulnerabi + /sys/devices/system/cpu/vulnerabilities/mds + /sys/devices/system/cpu/vulnerabilities/meltdown + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data ++ /sys/devices/system/cpu/vulnerabilities/old_microcode + /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling + /sys/devices/system/cpu/vulnerabilities/retbleed + /sys/devices/system/cpu/vulnerabilities/spec_store_bypass +--- a/Documentation/admin-guide/hw-vuln/index.rst ++++ b/Documentation/admin-guide/hw-vuln/index.rst +@@ -23,4 +23,5 @@ are configurable at compile, boot or run + gather_data_sampling + reg-file-data-sampling + rsb ++ old_microcode + indirect-target-selection +--- /dev/null ++++ b/Documentation/admin-guide/hw-vuln/old_microcode.rst +@@ -0,0 +1,21 @@ ++.. SPDX-License-Identifier: GPL-2.0 ++ ++============= ++Old Microcode ++============= ++ ++The kernel keeps a table of released microcode. Systems that had ++microcode older than this at boot will say "Vulnerable". This means ++that the system was vulnerable to some known CPU issue. It could be ++security or functional, the kernel does not know or care. ++ ++You should update the CPU microcode to mitigate any exposure. This is ++usually accomplished by updating the files in ++/lib/firmware/intel-ucode/ via normal distribution updates. Intel also ++distributes these files in a github repo: ++ ++ https://github.com/intel/Intel-Linux-Processor-Microcode-Data-Files.git ++ ++Just like all the other hardware vulnerabilities, exposure is ++determined at boot. Runtime microcode updates do not change the status ++of this vulnerability. +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -534,6 +534,8 @@ + #define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ + #define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ + #define X86_BUG_SPECTRE_V2_USER X86_BUG(1*32 + 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ +-#define X86_BUG_ITS X86_BUG(1*32 + 6) /* "its" CPU is affected by Indirect Target Selection */ +-#define X86_BUG_ITS_NATIVE_ONLY X86_BUG(1*32 + 7) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ ++#define X86_BUG_OLD_MICROCODE X86_BUG(1*32 + 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */ ++#define X86_BUG_ITS X86_BUG(1*32 + 7) /* "its" CPU is affected by Indirect Target Selection */ ++#define X86_BUG_ITS_NATIVE_ONLY X86_BUG(1*32 + 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ ++ + #endif /* _ASM_X86_CPUFEATURES_H */ +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -2954,6 +2954,14 @@ static ssize_t its_show_state(char *buf) + return sysfs_emit(buf, "%s\n", its_strings[its_mitigation]); + } + ++static ssize_t old_microcode_show_state(char *buf) ++{ ++ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) ++ return sysfs_emit(buf, "Unknown: running under hypervisor"); ++ ++ return sysfs_emit(buf, "Vulnerable\n"); ++} ++ + static char *stibp_state(void) + { + if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && +@@ -3136,6 +3144,9 @@ static ssize_t cpu_show_common(struct de + case X86_BUG_RFDS: + return rfds_show_state(buf); + ++ case X86_BUG_OLD_MICROCODE: ++ return old_microcode_show_state(buf); ++ + case X86_BUG_ITS: + return its_show_state(buf); + +@@ -3219,6 +3230,11 @@ ssize_t cpu_show_reg_file_data_sampling( + return cpu_show_common(dev, attr, buf, X86_BUG_RFDS); + } + ++ssize_t cpu_show_old_microcode(struct device *dev, struct device_attribute *attr, char *buf) ++{ ++ return cpu_show_common(dev, attr, buf, X86_BUG_OLD_MICROCODE); ++} ++ + ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_attribute *attr, char *buf) + { + return cpu_show_common(dev, attr, buf, X86_BUG_ITS); +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1351,10 +1351,52 @@ static bool __init vulnerable_to_its(u64 + return false; + } + ++static struct x86_cpu_id cpu_latest_microcode[] = { ++#include "microcode/intel-ucode-defs.h" ++ {} ++}; ++ ++static bool __init cpu_has_old_microcode(void) ++{ ++ const struct x86_cpu_id *m = x86_match_cpu(cpu_latest_microcode); ++ ++ /* Give unknown CPUs a pass: */ ++ if (!m) { ++ /* Intel CPUs should be in the list. Warn if not: */ ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ++ pr_info("x86/CPU: Model not found in latest microcode list\n"); ++ return false; ++ } ++ ++ /* ++ * Hosts usually lie to guests with a super high microcode ++ * version. Just ignore what hosts tell guests: ++ */ ++ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) ++ return false; ++ ++ /* Consider all debug microcode to be old: */ ++ if (boot_cpu_data.microcode & BIT(31)) ++ return true; ++ ++ /* Give new microcode a pass: */ ++ if (boot_cpu_data.microcode >= m->driver_data) ++ return false; ++ ++ /* Uh oh, too old: */ ++ return true; ++} ++ + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) + { + u64 x86_arch_cap_msr = x86_read_arch_cap_msr(); + ++ if (cpu_has_old_microcode()) { ++ pr_warn("x86/CPU: Running old microcode\n"); ++ setup_force_cpu_bug(X86_BUG_OLD_MICROCODE); ++ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); ++ } ++ + /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ + if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) && + !(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO)) +--- /dev/null ++++ b/arch/x86/kernel/cpu/microcode/intel-ucode-defs.h +@@ -0,0 +1,150 @@ ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x03, .steppings = 0x0004, .driver_data = 0x2 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0001, .driver_data = 0x45 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0002, .driver_data = 0x40 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0004, .driver_data = 0x2c }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x05, .steppings = 0x0008, .driver_data = 0x10 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x0001, .driver_data = 0xa }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x0020, .driver_data = 0x3 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x0400, .driver_data = 0xd }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x06, .steppings = 0x2000, .driver_data = 0x7 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x07, .steppings = 0x0002, .driver_data = 0x14 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x07, .steppings = 0x0004, .driver_data = 0x38 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x07, .steppings = 0x0008, .driver_data = 0x2e }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0002, .driver_data = 0x11 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0008, .driver_data = 0x8 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0040, .driver_data = 0xc }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x08, .steppings = 0x0400, .driver_data = 0x5 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x09, .steppings = 0x0020, .driver_data = 0x47 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0a, .steppings = 0x0001, .driver_data = 0x3 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0a, .steppings = 0x0002, .driver_data = 0x1 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0b, .steppings = 0x0002, .driver_data = 0x1d }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0b, .steppings = 0x0010, .driver_data = 0x2 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0d, .steppings = 0x0040, .driver_data = 0x18 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0e, .steppings = 0x0100, .driver_data = 0x39 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0e, .steppings = 0x1000, .driver_data = 0x59 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0004, .driver_data = 0x5d }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0040, .driver_data = 0xd2 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0080, .driver_data = 0x6b }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0400, .driver_data = 0x95 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x0800, .driver_data = 0xbc }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x0f, .steppings = 0x2000, .driver_data = 0xa4 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x16, .steppings = 0x0002, .driver_data = 0x44 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x17, .steppings = 0x0040, .driver_data = 0x60f }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x17, .steppings = 0x0080, .driver_data = 0x70a }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x17, .steppings = 0x0400, .driver_data = 0xa0b }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1a, .steppings = 0x0010, .driver_data = 0x12 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1a, .steppings = 0x0020, .driver_data = 0x1d }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1c, .steppings = 0x0004, .driver_data = 0x219 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1c, .steppings = 0x0400, .driver_data = 0x107 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1d, .steppings = 0x0002, .driver_data = 0x29 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x1e, .steppings = 0x0020, .driver_data = 0xa }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x25, .steppings = 0x0004, .driver_data = 0x11 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x25, .steppings = 0x0020, .driver_data = 0x7 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x26, .steppings = 0x0002, .driver_data = 0x105 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2a, .steppings = 0x0080, .driver_data = 0x2f }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2c, .steppings = 0x0004, .driver_data = 0x1f }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2d, .steppings = 0x0040, .driver_data = 0x621 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2d, .steppings = 0x0080, .driver_data = 0x71a }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2e, .steppings = 0x0040, .driver_data = 0xd }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x2f, .steppings = 0x0004, .driver_data = 0x3b }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x37, .steppings = 0x0100, .driver_data = 0x838 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x37, .steppings = 0x0200, .driver_data = 0x90d }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3a, .steppings = 0x0200, .driver_data = 0x21 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3c, .steppings = 0x0008, .driver_data = 0x28 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3d, .steppings = 0x0010, .driver_data = 0x2f }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3e, .steppings = 0x0010, .driver_data = 0x42e }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3e, .steppings = 0x0040, .driver_data = 0x600 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3e, .steppings = 0x0080, .driver_data = 0x715 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3f, .steppings = 0x0004, .driver_data = 0x49 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x3f, .steppings = 0x0010, .driver_data = 0x1a }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x45, .steppings = 0x0002, .driver_data = 0x26 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x46, .steppings = 0x0002, .driver_data = 0x1c }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x47, .steppings = 0x0002, .driver_data = 0x22 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x4c, .steppings = 0x0008, .driver_data = 0x368 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x4c, .steppings = 0x0010, .driver_data = 0x411 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x4d, .steppings = 0x0100, .driver_data = 0x12d }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x4e, .steppings = 0x0008, .driver_data = 0xf0 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0008, .driver_data = 0x1000191 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0010, .driver_data = 0x2007006 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0020, .driver_data = 0x3000010 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0040, .driver_data = 0x4003605 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0080, .driver_data = 0x5003707 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0800, .driver_data = 0x7002904 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x56, .steppings = 0x0004, .driver_data = 0x1c }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x56, .steppings = 0x0008, .driver_data = 0x700001c }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x56, .steppings = 0x0010, .driver_data = 0xf00001a }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x56, .steppings = 0x0020, .driver_data = 0xe000015 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5c, .steppings = 0x0004, .driver_data = 0x14 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5c, .steppings = 0x0200, .driver_data = 0x48 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5c, .steppings = 0x0400, .driver_data = 0x28 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5e, .steppings = 0x0008, .driver_data = 0xf0 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5f, .steppings = 0x0002, .driver_data = 0x3e }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x66, .steppings = 0x0008, .driver_data = 0x2a }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x6a, .steppings = 0x0020, .driver_data = 0xc0002f0 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x6a, .steppings = 0x0040, .driver_data = 0xd0003e7 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x6c, .steppings = 0x0002, .driver_data = 0x10002b0 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x7a, .steppings = 0x0002, .driver_data = 0x42 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x7a, .steppings = 0x0100, .driver_data = 0x24 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x7e, .steppings = 0x0020, .driver_data = 0xc6 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8a, .steppings = 0x0002, .driver_data = 0x33 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8c, .steppings = 0x0002, .driver_data = 0xb8 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8c, .steppings = 0x0004, .driver_data = 0x38 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8d, .steppings = 0x0002, .driver_data = 0x52 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x0200, .driver_data = 0xf6 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x0400, .driver_data = 0xf6 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x0800, .driver_data = 0xf6 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x1000, .driver_data = 0xfc }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0100, .driver_data = 0x2c000390 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0080, .driver_data = 0x2b000603 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0040, .driver_data = 0x2c000390 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0020, .driver_data = 0x2c000390 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0010, .driver_data = 0x2c000390 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x96, .steppings = 0x0002, .driver_data = 0x1a }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x97, .steppings = 0x0004, .driver_data = 0x37 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x97, .steppings = 0x0020, .driver_data = 0x37 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbf, .steppings = 0x0004, .driver_data = 0x37 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbf, .steppings = 0x0020, .driver_data = 0x37 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9a, .steppings = 0x0008, .driver_data = 0x435 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9a, .steppings = 0x0010, .driver_data = 0x435 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9c, .steppings = 0x0001, .driver_data = 0x24000026 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x0200, .driver_data = 0xf8 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x0400, .driver_data = 0xf8 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x0800, .driver_data = 0xf6 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x1000, .driver_data = 0xf8 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x2000, .driver_data = 0x100 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa5, .steppings = 0x0004, .driver_data = 0xfc }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa5, .steppings = 0x0008, .driver_data = 0xfc }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa5, .steppings = 0x0020, .driver_data = 0xfc }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa6, .steppings = 0x0001, .driver_data = 0xfe }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa6, .steppings = 0x0002, .driver_data = 0xfc }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa7, .steppings = 0x0002, .driver_data = 0x62 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xaa, .steppings = 0x0010, .driver_data = 0x20 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xb7, .steppings = 0x0002, .driver_data = 0x12b }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xba, .steppings = 0x0004, .driver_data = 0x4123 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xba, .steppings = 0x0008, .driver_data = 0x4123 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xba, .steppings = 0x0100, .driver_data = 0x4123 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbe, .steppings = 0x0001, .driver_data = 0x1a }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xcf, .steppings = 0x0004, .driver_data = 0x21000283 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xcf, .steppings = 0x0002, .driver_data = 0x21000283 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x00, .steppings = 0x0080, .driver_data = 0x12 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x00, .steppings = 0x0400, .driver_data = 0x15 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x01, .steppings = 0x0004, .driver_data = 0x2e }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0010, .driver_data = 0x21 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0020, .driver_data = 0x2c }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0040, .driver_data = 0x10 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0080, .driver_data = 0x39 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x02, .steppings = 0x0200, .driver_data = 0x2f }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x03, .steppings = 0x0004, .driver_data = 0xa }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x03, .steppings = 0x0008, .driver_data = 0xc }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x03, .steppings = 0x0010, .driver_data = 0x17 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0002, .driver_data = 0x17 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0008, .driver_data = 0x5 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0010, .driver_data = 0x6 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0080, .driver_data = 0x3 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0100, .driver_data = 0xe }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0200, .driver_data = 0x3 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x04, .steppings = 0x0400, .driver_data = 0x4 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x06, .steppings = 0x0004, .driver_data = 0xf }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x06, .steppings = 0x0010, .driver_data = 0x4 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x06, .steppings = 0x0020, .driver_data = 0x8 }, ++{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x06, .steppings = 0x0100, .driver_data = 0x9 }, +--- a/drivers/base/cpu.c ++++ b/drivers/base/cpu.c +@@ -600,6 +600,7 @@ CPU_SHOW_VULN_FALLBACK(spec_rstack_overf + CPU_SHOW_VULN_FALLBACK(gds); + CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling); + CPU_SHOW_VULN_FALLBACK(ghostwrite); ++CPU_SHOW_VULN_FALLBACK(old_microcode); + CPU_SHOW_VULN_FALLBACK(indirect_target_selection); + + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); +@@ -617,6 +618,7 @@ static DEVICE_ATTR(spec_rstack_overflow, + static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); + static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); + static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL); ++static DEVICE_ATTR(old_microcode, 0444, cpu_show_old_microcode, NULL); + static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL); + + static struct attribute *cpu_root_vulnerabilities_attrs[] = { +@@ -635,6 +637,7 @@ static struct attribute *cpu_root_vulner + &dev_attr_gather_data_sampling.attr, + &dev_attr_reg_file_data_sampling.attr, + &dev_attr_ghostwrite.attr, ++ &dev_attr_old_microcode.attr, + &dev_attr_indirect_target_selection.attr, + NULL + }; +--- a/include/linux/cpu.h ++++ b/include/linux/cpu.h +@@ -78,6 +78,8 @@ extern ssize_t cpu_show_gds(struct devic + extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, + struct device_attribute *attr, char *buf); + extern ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf); ++extern ssize_t cpu_show_old_microcode(struct device *dev, ++ struct device_attribute *attr, char *buf); + extern ssize_t cpu_show_indirect_target_selection(struct device *dev, + struct device_attribute *attr, char *buf); + diff --git a/debian/patches/patchset-zen/ksm/0001-mm-expose-per-process-KSM-control-via-syscalls.patch b/debian/patches/patchset-zen/ksm/0001-mm-expose-per-process-KSM-control-via-syscalls.patch deleted file mode 100644 index 3382798..0000000 --- a/debian/patches/patchset-zen/ksm/0001-mm-expose-per-process-KSM-control-via-syscalls.patch +++ /dev/null @@ -1,398 +0,0 @@ -From 4ad0ae3b81cd90c0729df9ac5f1ff21f4dad6130 Mon Sep 17 00:00:00 2001 -From: Oleksandr Natalenko -Date: Mon, 30 Sep 2024 08:58:38 +0200 -Subject: mm: expose per-process KSM control via syscalls - -d7597f59d1d3 added a new API to enable per-process KSM control. It -however uses prctl, which doesn't allow controlling KSM from outside of -the current process. - -Hence, expose this API via 3 syscalls: process_ksm_enable, -process_ksm_disable and process_ksm_status. Given sufficient privileges, -auto-KSM can be enable by another process. - -Since these syscalls are not in the upstream kernel, also expose their -numbers under /sys/kernel/process_ksm so that userspace tooling like -uksmd knows how to use them. - -Signed-off-by: Oleksandr Natalenko ---- - arch/alpha/kernel/syscalls/syscall.tbl | 3 + - arch/arm/tools/syscall.tbl | 3 + - arch/m68k/kernel/syscalls/syscall.tbl | 3 + - arch/microblaze/kernel/syscalls/syscall.tbl | 3 + - arch/mips/kernel/syscalls/syscall_n32.tbl | 3 + - arch/mips/kernel/syscalls/syscall_n64.tbl | 3 + - arch/mips/kernel/syscalls/syscall_o32.tbl | 3 + - arch/parisc/kernel/syscalls/syscall.tbl | 3 + - arch/powerpc/kernel/syscalls/syscall.tbl | 3 + - arch/s390/kernel/syscalls/syscall.tbl | 3 + - arch/sh/kernel/syscalls/syscall.tbl | 3 + - arch/sparc/kernel/syscalls/syscall.tbl | 3 + - arch/x86/entry/syscalls/syscall_32.tbl | 3 + - arch/x86/entry/syscalls/syscall_64.tbl | 3 + - arch/xtensa/kernel/syscalls/syscall.tbl | 3 + - include/linux/syscalls.h | 3 + - include/uapi/asm-generic/unistd.h | 9 +- - kernel/sys.c | 138 ++++++++++++++++++ - kernel/sys_ni.c | 3 + - scripts/syscall.tbl | 3 + - .../arch/powerpc/entry/syscalls/syscall.tbl | 3 + - .../perf/arch/s390/entry/syscalls/syscall.tbl | 3 + - 22 files changed, 206 insertions(+), 1 deletion(-) - ---- a/arch/alpha/kernel/syscalls/syscall.tbl -+++ b/arch/alpha/kernel/syscalls/syscall.tbl -@@ -506,3 +506,6 @@ - 574 common getxattrat sys_getxattrat - 575 common listxattrat sys_listxattrat - 576 common removexattrat sys_removexattrat -+577 common process_ksm_enable sys_process_ksm_enable -+578 common process_ksm_disable sys_process_ksm_disable -+579 common process_ksm_status sys_process_ksm_status ---- a/arch/arm/tools/syscall.tbl -+++ b/arch/arm/tools/syscall.tbl -@@ -481,3 +481,6 @@ - 464 common getxattrat sys_getxattrat - 465 common listxattrat sys_listxattrat - 466 common removexattrat sys_removexattrat -+467 common process_ksm_enable sys_process_ksm_enable -+468 common process_ksm_disable sys_process_ksm_disable -+469 common process_ksm_status sys_process_ksm_status ---- a/arch/m68k/kernel/syscalls/syscall.tbl -+++ b/arch/m68k/kernel/syscalls/syscall.tbl -@@ -466,3 +466,6 @@ - 464 common getxattrat sys_getxattrat - 465 common listxattrat sys_listxattrat - 466 common removexattrat sys_removexattrat -+467 common process_ksm_enable sys_process_ksm_enable -+468 common process_ksm_disable sys_process_ksm_disable -+469 common process_ksm_status sys_process_ksm_status ---- a/arch/microblaze/kernel/syscalls/syscall.tbl -+++ b/arch/microblaze/kernel/syscalls/syscall.tbl -@@ -472,3 +472,6 @@ - 464 common getxattrat sys_getxattrat - 465 common listxattrat sys_listxattrat - 466 common removexattrat sys_removexattrat -+467 common process_ksm_enable sys_process_ksm_enable -+468 common process_ksm_disable sys_process_ksm_disable -+469 common process_ksm_status sys_process_ksm_status ---- a/arch/mips/kernel/syscalls/syscall_n32.tbl -+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl -@@ -405,3 +405,6 @@ - 464 n32 getxattrat sys_getxattrat - 465 n32 listxattrat sys_listxattrat - 466 n32 removexattrat sys_removexattrat -+467 n32 process_ksm_enable sys_process_ksm_enable -+468 n32 process_ksm_disable sys_process_ksm_disable -+469 n32 process_ksm_status sys_process_ksm_status ---- a/arch/mips/kernel/syscalls/syscall_n64.tbl -+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl -@@ -381,3 +381,6 @@ - 464 n64 getxattrat sys_getxattrat - 465 n64 listxattrat sys_listxattrat - 466 n64 removexattrat sys_removexattrat -+467 n64 process_ksm_enable sys_process_ksm_enable -+468 n64 process_ksm_disable sys_process_ksm_disable -+469 n64 process_ksm_status sys_process_ksm_status ---- a/arch/mips/kernel/syscalls/syscall_o32.tbl -+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl -@@ -454,3 +454,6 @@ - 464 o32 getxattrat sys_getxattrat - 465 o32 listxattrat sys_listxattrat - 466 o32 removexattrat sys_removexattrat -+467 o32 process_ksm_enable sys_process_ksm_enable -+468 o32 process_ksm_disable sys_process_ksm_disable -+469 o32 process_ksm_status sys_process_ksm_status ---- a/arch/parisc/kernel/syscalls/syscall.tbl -+++ b/arch/parisc/kernel/syscalls/syscall.tbl -@@ -465,3 +465,6 @@ - 464 common getxattrat sys_getxattrat - 465 common listxattrat sys_listxattrat - 466 common removexattrat sys_removexattrat -+467 common process_ksm_enable sys_process_ksm_enable -+468 common process_ksm_disable sys_process_ksm_disable -+469 common process_ksm_status sys_process_ksm_status ---- a/arch/powerpc/kernel/syscalls/syscall.tbl -+++ b/arch/powerpc/kernel/syscalls/syscall.tbl -@@ -557,3 +557,6 @@ - 464 common getxattrat sys_getxattrat - 465 common listxattrat sys_listxattrat - 466 common removexattrat sys_removexattrat -+467 common process_ksm_enable sys_process_ksm_enable -+468 common process_ksm_disable sys_process_ksm_disable -+469 common process_ksm_status sys_process_ksm_status ---- a/arch/s390/kernel/syscalls/syscall.tbl -+++ b/arch/s390/kernel/syscalls/syscall.tbl -@@ -469,3 +469,6 @@ - 464 common getxattrat sys_getxattrat sys_getxattrat - 465 common listxattrat sys_listxattrat sys_listxattrat - 466 common removexattrat sys_removexattrat sys_removexattrat -+467 common process_ksm_enable sys_process_ksm_enable sys_process_ksm_enable -+468 common process_ksm_disable sys_process_ksm_disable sys_process_ksm_disable -+469 common process_ksm_status sys_process_ksm_status sys_process_ksm_status ---- a/arch/sh/kernel/syscalls/syscall.tbl -+++ b/arch/sh/kernel/syscalls/syscall.tbl -@@ -470,3 +470,6 @@ - 464 common getxattrat sys_getxattrat - 465 common listxattrat sys_listxattrat - 466 common removexattrat sys_removexattrat -+467 common process_ksm_enable sys_process_ksm_enable -+468 common process_ksm_disable sys_process_ksm_disable -+469 common process_ksm_status sys_process_ksm_status ---- a/arch/sparc/kernel/syscalls/syscall.tbl -+++ b/arch/sparc/kernel/syscalls/syscall.tbl -@@ -512,3 +512,6 @@ - 464 common getxattrat sys_getxattrat - 465 common listxattrat sys_listxattrat - 466 common removexattrat sys_removexattrat -+467 common process_ksm_enable sys_process_ksm_enable -+468 common process_ksm_disable sys_process_ksm_disable -+469 common process_ksm_status sys_process_ksm_status ---- a/arch/x86/entry/syscalls/syscall_32.tbl -+++ b/arch/x86/entry/syscalls/syscall_32.tbl -@@ -472,3 +472,6 @@ - 464 i386 getxattrat sys_getxattrat - 465 i386 listxattrat sys_listxattrat - 466 i386 removexattrat sys_removexattrat -+467 i386 process_ksm_enable sys_process_ksm_enable -+468 i386 process_ksm_disable sys_process_ksm_disable -+469 i386 process_ksm_status sys_process_ksm_status ---- a/arch/x86/entry/syscalls/syscall_64.tbl -+++ b/arch/x86/entry/syscalls/syscall_64.tbl -@@ -390,6 +390,9 @@ - 464 common getxattrat sys_getxattrat - 465 common listxattrat sys_listxattrat - 466 common removexattrat sys_removexattrat -+467 common process_ksm_enable sys_process_ksm_enable -+468 common process_ksm_disable sys_process_ksm_disable -+469 common process_ksm_status sys_process_ksm_status - - # - # Due to a historical design error, certain syscalls are numbered differently ---- a/arch/xtensa/kernel/syscalls/syscall.tbl -+++ b/arch/xtensa/kernel/syscalls/syscall.tbl -@@ -437,3 +437,6 @@ - 464 common getxattrat sys_getxattrat - 465 common listxattrat sys_listxattrat - 466 common removexattrat sys_removexattrat -+467 common process_ksm_enable sys_process_ksm_enable -+468 common process_ksm_disable sys_process_ksm_disable -+469 common process_ksm_status sys_process_ksm_status ---- a/include/linux/syscalls.h -+++ b/include/linux/syscalls.h -@@ -831,6 +831,9 @@ asmlinkage long sys_madvise(unsigned lon - asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec, - size_t vlen, int behavior, unsigned int flags); - asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags); -+asmlinkage long sys_process_ksm_enable(int pidfd, unsigned int flags); -+asmlinkage long sys_process_ksm_disable(int pidfd, unsigned int flags); -+asmlinkage long sys_process_ksm_status(int pidfd, unsigned int flags); - asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, - unsigned long prot, unsigned long pgoff, - unsigned long flags); ---- a/include/uapi/asm-generic/unistd.h -+++ b/include/uapi/asm-generic/unistd.h -@@ -850,8 +850,15 @@ __SYSCALL(__NR_listxattrat, sys_listxatt - #define __NR_removexattrat 466 - __SYSCALL(__NR_removexattrat, sys_removexattrat) - -+#define __NR_process_ksm_enable 467 -+__SYSCALL(__NR_process_ksm_enable, sys_process_ksm_enable) -+#define __NR_process_ksm_disable 468 -+__SYSCALL(__NR_process_ksm_disable, sys_process_ksm_disable) -+#define __NR_process_ksm_status 469 -+__SYSCALL(__NR_process_ksm_status, sys_process_ksm_status) -+ - #undef __NR_syscalls --#define __NR_syscalls 467 -+#define __NR_syscalls 470 - - /* - * 32 bit systems traditionally used different ---- a/kernel/sys.c -+++ b/kernel/sys.c -@@ -2819,6 +2819,144 @@ SYSCALL_DEFINE5(prctl, int, option, unsi - return error; - } - -+#ifdef CONFIG_KSM -+enum pkc_action { -+ PKSM_ENABLE = 0, -+ PKSM_DISABLE, -+ PKSM_STATUS, -+}; -+ -+static long do_process_ksm_control(int pidfd, enum pkc_action action) -+{ -+ long ret; -+ struct task_struct *task; -+ struct mm_struct *mm; -+ unsigned int f_flags; -+ -+ task = pidfd_get_task(pidfd, &f_flags); -+ if (IS_ERR(task)) { -+ ret = PTR_ERR(task); -+ goto out; -+ } -+ -+ /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */ -+ mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); -+ if (IS_ERR_OR_NULL(mm)) { -+ ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH; -+ goto release_task; -+ } -+ -+ /* Require CAP_SYS_NICE for influencing process performance. */ -+ if (!capable(CAP_SYS_NICE)) { -+ ret = -EPERM; -+ goto release_mm; -+ } -+ -+ if (mmap_write_lock_killable(mm)) { -+ ret = -EINTR; -+ goto release_mm; -+ } -+ -+ switch (action) { -+ case PKSM_ENABLE: -+ ret = ksm_enable_merge_any(mm); -+ break; -+ case PKSM_DISABLE: -+ ret = ksm_disable_merge_any(mm); -+ break; -+ case PKSM_STATUS: -+ ret = !!test_bit(MMF_VM_MERGE_ANY, &mm->flags); -+ break; -+ } -+ -+ mmap_write_unlock(mm); -+ -+release_mm: -+ mmput(mm); -+release_task: -+ put_task_struct(task); -+out: -+ return ret; -+} -+#endif /* CONFIG_KSM */ -+ -+SYSCALL_DEFINE2(process_ksm_enable, int, pidfd, unsigned int, flags) -+{ -+#ifdef CONFIG_KSM -+ if (flags != 0) -+ return -EINVAL; -+ -+ return do_process_ksm_control(pidfd, PKSM_ENABLE); -+#else /* CONFIG_KSM */ -+ return -ENOSYS; -+#endif /* CONFIG_KSM */ -+} -+ -+SYSCALL_DEFINE2(process_ksm_disable, int, pidfd, unsigned int, flags) -+{ -+#ifdef CONFIG_KSM -+ if (flags != 0) -+ return -EINVAL; -+ -+ return do_process_ksm_control(pidfd, PKSM_DISABLE); -+#else /* CONFIG_KSM */ -+ return -ENOSYS; -+#endif /* CONFIG_KSM */ -+} -+ -+SYSCALL_DEFINE2(process_ksm_status, int, pidfd, unsigned int, flags) -+{ -+#ifdef CONFIG_KSM -+ if (flags != 0) -+ return -EINVAL; -+ -+ return do_process_ksm_control(pidfd, PKSM_STATUS); -+#else /* CONFIG_KSM */ -+ return -ENOSYS; -+#endif /* CONFIG_KSM */ -+} -+ -+#ifdef CONFIG_KSM -+static ssize_t process_ksm_enable_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "%u\n", __NR_process_ksm_enable); -+} -+static struct kobj_attribute process_ksm_enable_attr = __ATTR_RO(process_ksm_enable); -+ -+static ssize_t process_ksm_disable_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "%u\n", __NR_process_ksm_disable); -+} -+static struct kobj_attribute process_ksm_disable_attr = __ATTR_RO(process_ksm_disable); -+ -+static ssize_t process_ksm_status_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "%u\n", __NR_process_ksm_status); -+} -+static struct kobj_attribute process_ksm_status_attr = __ATTR_RO(process_ksm_status); -+ -+static struct attribute *process_ksm_sysfs_attrs[] = { -+ &process_ksm_enable_attr.attr, -+ &process_ksm_disable_attr.attr, -+ &process_ksm_status_attr.attr, -+ NULL, -+}; -+ -+static const struct attribute_group process_ksm_sysfs_attr_group = { -+ .attrs = process_ksm_sysfs_attrs, -+ .name = "process_ksm", -+}; -+ -+static int __init process_ksm_sysfs_init(void) -+{ -+ return sysfs_create_group(kernel_kobj, &process_ksm_sysfs_attr_group); -+} -+subsys_initcall(process_ksm_sysfs_init); -+#endif /* CONFIG_KSM */ -+ - SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, - struct getcpu_cache __user *, unused) - { ---- a/kernel/sys_ni.c -+++ b/kernel/sys_ni.c -@@ -186,6 +186,9 @@ COND_SYSCALL(mincore); - COND_SYSCALL(madvise); - COND_SYSCALL(process_madvise); - COND_SYSCALL(process_mrelease); -+COND_SYSCALL(process_ksm_enable); -+COND_SYSCALL(process_ksm_disable); -+COND_SYSCALL(process_ksm_status); - COND_SYSCALL(remap_file_pages); - COND_SYSCALL(mbind); - COND_SYSCALL(get_mempolicy); ---- a/scripts/syscall.tbl -+++ b/scripts/syscall.tbl -@@ -407,3 +407,6 @@ - 464 common getxattrat sys_getxattrat - 465 common listxattrat sys_listxattrat - 466 common removexattrat sys_removexattrat -+467 common process_ksm_enable sys_process_ksm_enable -+468 common process_ksm_disable sys_process_ksm_disable -+469 common process_ksm_status sys_process_ksm_status ---- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl -+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl -@@ -557,3 +557,6 @@ - 464 common getxattrat sys_getxattrat - 465 common listxattrat sys_listxattrat - 466 common removexattrat sys_removexattrat -+467 common process_ksm_enable sys_process_ksm_enable -+468 common process_ksm_disable sys_process_ksm_disable -+469 common process_ksm_status sys_process_ksm_status ---- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl -+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl -@@ -469,3 +469,6 @@ - 464 common getxattrat sys_getxattrat sys_getxattrat - 465 common listxattrat sys_listxattrat sys_listxattrat - 466 common removexattrat sys_removexattrat sys_removexattrat -+467 common process_ksm_enable sys_process_ksm_enable sys_process_ksm_enable -+468 common process_ksm_disable sys_process_ksm_disable sys_process_ksm_disable -+469 common process_ksm_status sys_process_ksm_status sys_process_ksm_status diff --git a/debian/patches/patchset-zen/sauce/0001-ZEN-Add-VHBA-driver.patch b/debian/patches/patchset-zen/sauce/0001-ZEN-Add-VHBA-driver.patch index 59c1730..2be6130 100644 --- a/debian/patches/patchset-zen/sauce/0001-ZEN-Add-VHBA-driver.patch +++ b/debian/patches/patchset-zen/sauce/0001-ZEN-Add-VHBA-driver.patch @@ -1,24 +1,24 @@ -From 6d141e3121676e9ca50d6465a622b9a5d572219a Mon Sep 17 00:00:00 2001 +From eceae849a8242fcfeec64470f6f4c24fbae0d614 Mon Sep 17 00:00:00 2001 From: "Jan Alexander Steffens (heftig)" Date: Mon, 26 Apr 2021 22:12:46 +0200 Subject: ZEN: Add VHBA driver remote https://github.com/cdemu/cdemu -tag vhba-module-20240917 +tag vhba-module-20250329 --- drivers/scsi/Kconfig | 2 + drivers/scsi/Makefile | 1 + drivers/scsi/vhba/Kconfig | 9 + drivers/scsi/vhba/Makefile | 4 + - drivers/scsi/vhba/vhba.c | 1130 ++++++++++++++++++++++++++++++++++++ - 5 files changed, 1146 insertions(+) + drivers/scsi/vhba/vhba.c | 1132 ++++++++++++++++++++++++++++++++++++ + 5 files changed, 1148 insertions(+) create mode 100644 drivers/scsi/vhba/Kconfig create mode 100644 drivers/scsi/vhba/Makefile create mode 100644 drivers/scsi/vhba/vhba.c --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig -@@ -1522,4 +1522,6 @@ endif # SCSI_LOWLEVEL +@@ -1521,4 +1521,6 @@ endif # SCSI_LOWLEVEL source "drivers/scsi/device_handler/Kconfig" @@ -27,7 +27,7 @@ tag vhba-module-20240917 endmenu --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile -@@ -153,6 +153,7 @@ obj-$(CONFIG_CHR_DEV_SCH) += ch.o +@@ -152,6 +152,7 @@ obj-$(CONFIG_CHR_DEV_SCH) += ch.o obj-$(CONFIG_SCSI_ENCLOSURE) += ses.o obj-$(CONFIG_SCSI_HISI_SAS) += hisi_sas/ @@ -56,7 +56,7 @@ tag vhba-module-20240917 +ccflags-y := -DVHBA_VERSION=\"$(VHBA_VERSION)\" -Werror --- /dev/null +++ b/drivers/scsi/vhba/vhba.c -@@ -0,0 +1,1130 @@ +@@ -0,0 +1,1132 @@ +/* + * vhba.c + * @@ -596,7 +596,9 @@ tag vhba-module-20240917 +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) + .slave_alloc = vhba_slave_alloc, +#endif -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0) ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 14, 0) ++ .tag_alloc_policy_rr = true, ++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0) + .tag_alloc_policy = BLK_TAG_ALLOC_RR, +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) diff --git a/debian/patches/patchset-zen/sauce/0002-VHBA-fix-building-with-kernel-6.14-rc1.patch b/debian/patches/patchset-zen/sauce/0002-VHBA-fix-building-with-kernel-6.14-rc1.patch deleted file mode 100644 index 2040f9e..0000000 --- a/debian/patches/patchset-zen/sauce/0002-VHBA-fix-building-with-kernel-6.14-rc1.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 1f9910c9a54b424ad0cd415b981986937618c4ec Mon Sep 17 00:00:00 2001 -From: Rok Mandeljc -Date: Mon, 3 Feb 2025 21:05:32 +0100 -Subject: VHBA: fix building with kernel 6.14-rc1 - -Kernel 6.14-rc1 simplified the selection of tag allocation policy. -Instead of enum-based value, a boolean is used, and the corresponding -field in the `scsi_host_template` structure was renamed from -`tag_alloc_policy` to `tag_alloc_policy_rr`. - -See: https://github.com/torvalds/linux/commit/ce32496 ---- - drivers/scsi/vhba/vhba.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - ---- a/drivers/scsi/vhba/vhba.c -+++ b/drivers/scsi/vhba/vhba.c -@@ -537,7 +537,9 @@ static struct scsi_host_template vhba_te - #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) - .slave_alloc = vhba_slave_alloc, - #endif --#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0) -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 14, 0) -+ .tag_alloc_policy_rr = true, -+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0) - .tag_alloc_policy = BLK_TAG_ALLOC_RR, - #endif - #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) diff --git a/debian/patches/patchset-zen/sauce/0003-ZEN-PCI-Add-Intel-remapped-NVMe-device-support.patch b/debian/patches/patchset-zen/sauce/0002-ZEN-PCI-Add-Intel-remapped-NVMe-device-support.patch similarity index 99% rename from debian/patches/patchset-zen/sauce/0003-ZEN-PCI-Add-Intel-remapped-NVMe-device-support.patch rename to debian/patches/patchset-zen/sauce/0002-ZEN-PCI-Add-Intel-remapped-NVMe-device-support.patch index 6b8c047..55126fe 100644 --- a/debian/patches/patchset-zen/sauce/0003-ZEN-PCI-Add-Intel-remapped-NVMe-device-support.patch +++ b/debian/patches/patchset-zen/sauce/0002-ZEN-PCI-Add-Intel-remapped-NVMe-device-support.patch @@ -1,4 +1,4 @@ -From 02b4d790bb05e24e7408a147f33e4e9ca0b805fa Mon Sep 17 00:00:00 2001 +From e0d21c7f4ea5f33bb4a6076d8ff50ad19431e333 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Tue, 4 Jun 2019 14:51:21 +0800 Subject: ZEN: PCI: Add Intel remapped NVMe device support @@ -135,7 +135,7 @@ Contains: } static int ahci_get_irq_vector(struct ata_host *host, int port) -@@ -1909,7 +1902,9 @@ static int ahci_init_one(struct pci_dev +@@ -1912,7 +1905,9 @@ static int ahci_init_one(struct pci_dev return -ENOMEM; /* detect remapped nvme devices */ diff --git a/debian/patches/patchset-zen/sauce/0004-ZEN-Disable-stack-conservation-for-GCC.patch b/debian/patches/patchset-zen/sauce/0003-ZEN-Disable-stack-conservation-for-GCC.patch similarity index 86% rename from debian/patches/patchset-zen/sauce/0004-ZEN-Disable-stack-conservation-for-GCC.patch rename to debian/patches/patchset-zen/sauce/0003-ZEN-Disable-stack-conservation-for-GCC.patch index a483655..4fea33f 100644 --- a/debian/patches/patchset-zen/sauce/0004-ZEN-Disable-stack-conservation-for-GCC.patch +++ b/debian/patches/patchset-zen/sauce/0003-ZEN-Disable-stack-conservation-for-GCC.patch @@ -1,4 +1,4 @@ -From 17190525fdc9c9f73fe22832ab0631e9e1bbad6d Mon Sep 17 00:00:00 2001 +From 490a2fd553b92e5ad5f151994a9bbf953cc000f7 Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Sun, 8 Mar 2020 00:31:35 -0800 Subject: ZEN: Disable stack conservation for GCC @@ -15,7 +15,7 @@ Signed-off-by: Sultan Alsawaf --- a/Makefile +++ b/Makefile -@@ -1073,11 +1073,6 @@ KBUILD_CFLAGS += -fno-strict-overflow +@@ -1076,11 +1076,6 @@ KBUILD_CFLAGS += -fno-strict-overflow # Make sure -fstack-check isn't enabled (like gentoo apparently did) KBUILD_CFLAGS += -fno-stack-check diff --git a/debian/patches/patchset-zen/sauce/0005-ZEN-Input-evdev-use-call_rcu-when-detaching-client.patch b/debian/patches/patchset-zen/sauce/0004-ZEN-Input-evdev-use-call_rcu-when-detaching-client.patch similarity index 97% rename from debian/patches/patchset-zen/sauce/0005-ZEN-Input-evdev-use-call_rcu-when-detaching-client.patch rename to debian/patches/patchset-zen/sauce/0004-ZEN-Input-evdev-use-call_rcu-when-detaching-client.patch index 2420336..bda07e6 100644 --- a/debian/patches/patchset-zen/sauce/0005-ZEN-Input-evdev-use-call_rcu-when-detaching-client.patch +++ b/debian/patches/patchset-zen/sauce/0004-ZEN-Input-evdev-use-call_rcu-when-detaching-client.patch @@ -1,4 +1,4 @@ -From 2b801ae725ae05be994d374efdce8fc2e828687f Mon Sep 17 00:00:00 2001 +From 71ce760cd36faae55cc0fefebed49998b5eae864 Mon Sep 17 00:00:00 2001 From: Kenny Levinsen Date: Sun, 27 Dec 2020 14:43:13 +0000 Subject: ZEN: Input: evdev - use call_rcu when detaching client diff --git a/debian/patches/patchset-zen/sauce/0006-ZEN-cpufreq-Remove-schedutil-dependency-on-Intel-AMD.patch b/debian/patches/patchset-zen/sauce/0005-ZEN-cpufreq-Remove-schedutil-dependency-on-Intel-AMD.patch similarity index 94% rename from debian/patches/patchset-zen/sauce/0006-ZEN-cpufreq-Remove-schedutil-dependency-on-Intel-AMD.patch rename to debian/patches/patchset-zen/sauce/0005-ZEN-cpufreq-Remove-schedutil-dependency-on-Intel-AMD.patch index 161728e..0c891ac 100644 --- a/debian/patches/patchset-zen/sauce/0006-ZEN-cpufreq-Remove-schedutil-dependency-on-Intel-AMD.patch +++ b/debian/patches/patchset-zen/sauce/0005-ZEN-cpufreq-Remove-schedutil-dependency-on-Intel-AMD.patch @@ -1,4 +1,4 @@ -From 3777b5340ebf0460e6fb79205b294dd4333c9d8b Mon Sep 17 00:00:00 2001 +From 45cea9e15f2512535f2836ccddcf711f4823a2e1 Mon Sep 17 00:00:00 2001 From: Steven Barrett Date: Mon, 11 Jul 2022 19:10:30 -0500 Subject: ZEN: cpufreq: Remove schedutil dependency on Intel/AMD P-State diff --git a/debian/patches/patchset-zen/sauce/0007-ZEN-intel-pstate-Implement-enable-parameter.patch b/debian/patches/patchset-zen/sauce/0006-ZEN-intel-pstate-Implement-enable-parameter.patch similarity index 93% rename from debian/patches/patchset-zen/sauce/0007-ZEN-intel-pstate-Implement-enable-parameter.patch rename to debian/patches/patchset-zen/sauce/0006-ZEN-intel-pstate-Implement-enable-parameter.patch index b850c5e..b772783 100644 --- a/debian/patches/patchset-zen/sauce/0007-ZEN-intel-pstate-Implement-enable-parameter.patch +++ b/debian/patches/patchset-zen/sauce/0006-ZEN-intel-pstate-Implement-enable-parameter.patch @@ -1,4 +1,4 @@ -From d00df0f150c9d04cd229d42e0af906db3dfb5190 Mon Sep 17 00:00:00 2001 +From c5eb62bb4d6a06a5a95c0da0d41469f22e71556f Mon Sep 17 00:00:00 2001 From: Steven Barrett Date: Wed, 15 Jan 2020 20:43:56 -0600 Subject: ZEN: intel-pstate: Implement "enable" parameter @@ -30,7 +30,7 @@ selection. --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -2300,6 +2300,9 @@ +@@ -2324,6 +2324,9 @@ disable Do not enable intel_pstate as the default scaling driver for the supported processors @@ -42,7 +42,7 @@ selection. governors layer of cpufreq and provides it own --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c -@@ -3830,6 +3830,8 @@ static int __init intel_pstate_setup(cha +@@ -3828,6 +3828,8 @@ static int __init intel_pstate_setup(cha if (!strcmp(str, "disable")) no_load = 1; diff --git a/debian/patches/patchset-zen/sauce/0008-ZEN-drm-amdgpu-pm-Allow-override-of-min_power_limit-.patch b/debian/patches/patchset-zen/sauce/0007-ZEN-drm-amdgpu-pm-Allow-override-of-min_power_limit-.patch similarity index 87% rename from debian/patches/patchset-zen/sauce/0008-ZEN-drm-amdgpu-pm-Allow-override-of-min_power_limit-.patch rename to debian/patches/patchset-zen/sauce/0007-ZEN-drm-amdgpu-pm-Allow-override-of-min_power_limit-.patch index 9a76bcd..b740990 100644 --- a/debian/patches/patchset-zen/sauce/0008-ZEN-drm-amdgpu-pm-Allow-override-of-min_power_limit-.patch +++ b/debian/patches/patchset-zen/sauce/0007-ZEN-drm-amdgpu-pm-Allow-override-of-min_power_limit-.patch @@ -1,4 +1,4 @@ -From f03da22e562a7d65a97926a76f61daeef8a1eb0d Mon Sep 17 00:00:00 2001 +From b42cd00b809a2f69bbf5e1d63cb7ff90f5f51410 Mon Sep 17 00:00:00 2001 From: Steven Barrett Date: Fri, 15 Mar 2024 12:36:51 -0500 Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with @@ -13,7 +13,7 @@ Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h -@@ -160,6 +160,7 @@ struct amdgpu_watchdog_timer { +@@ -161,6 +161,7 @@ struct amdgpu_watchdog_timer { */ extern int amdgpu_modeset; extern unsigned int amdgpu_vram_limit; @@ -23,7 +23,7 @@ Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with extern int amdgpu_gtt_size; --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -@@ -139,6 +139,7 @@ enum AMDGPU_DEBUG_MASK { +@@ -143,6 +143,7 @@ enum AMDGPU_DEBUG_MASK { }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -31,7 +31,7 @@ Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with int amdgpu_vis_vram_limit; int amdgpu_gart_size = -1; /* auto */ int amdgpu_gtt_size = -1; /* auto */ -@@ -259,6 +260,15 @@ struct amdgpu_watchdog_timer amdgpu_watc +@@ -263,6 +264,15 @@ struct amdgpu_watchdog_timer amdgpu_watc }; /** @@ -49,7 +49,7 @@ Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with */ --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c -@@ -3180,6 +3180,9 @@ static ssize_t amdgpu_hwmon_show_power_c +@@ -3055,6 +3055,9 @@ static ssize_t amdgpu_hwmon_show_power_c struct device_attribute *attr, char *buf) { @@ -61,7 +61,7 @@ Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c -@@ -2824,7 +2824,10 @@ int smu_get_power_limit(void *handle, +@@ -2854,7 +2854,10 @@ int smu_get_power_limit(void *handle, *limit = smu->max_power_limit; break; case SMU_PPT_LIMIT_MIN: @@ -73,7 +73,7 @@ Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with break; default: return -EINVAL; -@@ -2848,7 +2851,14 @@ static int smu_set_power_limit(void *han +@@ -2878,7 +2881,14 @@ static int smu_set_power_limit(void *han if (smu->ppt_funcs->set_power_limit) return smu->ppt_funcs->set_power_limit(smu, limit_type, limit); diff --git a/debian/patches/patchset-zen/sauce/0009-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch b/debian/patches/patchset-zen/sauce/0008-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch similarity index 88% rename from debian/patches/patchset-zen/sauce/0009-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch rename to debian/patches/patchset-zen/sauce/0008-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch index 5535177..b556673 100644 --- a/debian/patches/patchset-zen/sauce/0009-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch +++ b/debian/patches/patchset-zen/sauce/0008-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch @@ -1,4 +1,4 @@ -From 5f93b67c4e2fa81be5cee3edd8ec056407d25f26 Mon Sep 17 00:00:00 2001 +From fe26e658b0a14ba9ab4f800bea6a7a43aae0981e Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Sun, 19 Apr 2020 19:59:18 -0700 Subject: ZEN: mm: Stop kswapd early when nothing's waiting for it to free @@ -43,7 +43,7 @@ Contains: --- a/mm/internal.h +++ b/mm/internal.h -@@ -736,6 +736,7 @@ void post_alloc_hook(struct page *page, +@@ -788,6 +788,7 @@ void post_alloc_hook(struct page *page, extern bool free_pages_prepare(struct page *page, unsigned int order); extern int user_min_free_kbytes; @@ -53,16 +53,16 @@ Contains: nodemask_t *); --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -88,6 +88,8 @@ typedef int __bitwise fpi_t; - */ - #define FPI_TO_TAIL ((__force fpi_t)BIT(1)) +@@ -91,6 +91,8 @@ typedef int __bitwise fpi_t; + /* Free the page without taking locks. Rely on trylock only. */ + #define FPI_TRYLOCK ((__force fpi_t)BIT(2)) +atomic_long_t kswapd_waiters = ATOMIC_LONG_INIT(0); + /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ static DEFINE_MUTEX(pcp_batch_high_lock); #define MIN_PERCPU_PAGELIST_HIGH_FRACTION (8) -@@ -4255,6 +4257,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, u +@@ -4436,6 +4438,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, u unsigned int cpuset_mems_cookie; unsigned int zonelist_iter_cookie; int reserve_flags; @@ -70,7 +70,7 @@ Contains: if (unlikely(nofail)) { /* -@@ -4314,8 +4317,13 @@ restart: +@@ -4495,8 +4498,13 @@ restart: goto nopage; } @@ -85,7 +85,7 @@ Contains: /* * The adjusted alloc_flags might result in immediate success, so try -@@ -4525,9 +4533,12 @@ nopage: +@@ -4711,9 +4719,12 @@ nopage: goto retry; } fail: @@ -102,7 +102,7 @@ Contains: --- a/mm/vmscan.c +++ b/mm/vmscan.c -@@ -6389,7 +6389,7 @@ retry: +@@ -6419,7 +6419,7 @@ retry: return 0; } @@ -111,7 +111,7 @@ Contains: { struct zone *zone; unsigned long pfmemalloc_reserve = 0; -@@ -6418,6 +6418,10 @@ static bool allow_direct_reclaim(pg_data +@@ -6444,6 +6444,10 @@ static bool allow_direct_reclaim(pg_data wmark_ok = free_pages > pfmemalloc_reserve / 2; @@ -122,7 +122,7 @@ Contains: /* kswapd must be awake if processes are being throttled */ if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) { if (READ_ONCE(pgdat->kswapd_highest_zoneidx) > ZONE_NORMAL) -@@ -6483,7 +6487,7 @@ static bool throttle_direct_reclaim(gfp_ +@@ -6509,7 +6513,7 @@ static bool throttle_direct_reclaim(gfp_ /* Throttle based on the first usable node */ pgdat = zone->zone_pgdat; @@ -131,7 +131,7 @@ Contains: goto out; break; } -@@ -6505,11 +6509,14 @@ static bool throttle_direct_reclaim(gfp_ +@@ -6531,11 +6535,14 @@ static bool throttle_direct_reclaim(gfp_ */ if (!(gfp_mask & __GFP_FS)) wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, @@ -148,7 +148,7 @@ Contains: if (fatal_signal_pending(current)) return true; -@@ -7012,14 +7019,14 @@ restart: +@@ -7056,14 +7063,14 @@ restart: * able to safely make forward progress. Wake them */ if (waitqueue_active(&pgdat->pfmemalloc_wait) && diff --git a/debian/patches/patchset-zen/sauce/0010-ZEN-ahci-Disable-staggered-spinup-by-default.patch b/debian/patches/patchset-zen/sauce/0009-ZEN-ahci-Disable-staggered-spinup-by-default.patch similarity index 93% rename from debian/patches/patchset-zen/sauce/0010-ZEN-ahci-Disable-staggered-spinup-by-default.patch rename to debian/patches/patchset-zen/sauce/0009-ZEN-ahci-Disable-staggered-spinup-by-default.patch index 815a8c4..7792c36 100644 --- a/debian/patches/patchset-zen/sauce/0010-ZEN-ahci-Disable-staggered-spinup-by-default.patch +++ b/debian/patches/patchset-zen/sauce/0009-ZEN-ahci-Disable-staggered-spinup-by-default.patch @@ -1,4 +1,4 @@ -From 80b06f0f0bba019632e40c11231987a7e996c340 Mon Sep 17 00:00:00 2001 +From bc6ff8d7a55a19fdd6828168cc35cba76f05c133 Mon Sep 17 00:00:00 2001 From: EXtremeExploit Date: Fri, 29 Nov 2024 13:05:27 -0300 Subject: ZEN: ahci: Disable staggered spinup by default diff --git a/debian/patches/patchset-zen/sauce/0011-ZEN-kernel-Kconfig.preempt-Remove-EXPERT-conditional.patch b/debian/patches/patchset-zen/sauce/0010-ZEN-kernel-Kconfig.preempt-Remove-EXPERT-conditional.patch similarity index 91% rename from debian/patches/patchset-zen/sauce/0011-ZEN-kernel-Kconfig.preempt-Remove-EXPERT-conditional.patch rename to debian/patches/patchset-zen/sauce/0010-ZEN-kernel-Kconfig.preempt-Remove-EXPERT-conditional.patch index 4d6533f..0e02e6d 100644 --- a/debian/patches/patchset-zen/sauce/0011-ZEN-kernel-Kconfig.preempt-Remove-EXPERT-conditional.patch +++ b/debian/patches/patchset-zen/sauce/0010-ZEN-kernel-Kconfig.preempt-Remove-EXPERT-conditional.patch @@ -1,4 +1,4 @@ -From ac35b7af0aac6a9eb996962130a99c9af75c8b08 Mon Sep 17 00:00:00 2001 +From 4ae0eb6d9a78f53d796996b17743b7df74a7f43d Mon Sep 17 00:00:00 2001 From: Steven Barrett Date: Sat, 14 Dec 2024 11:23:18 -0600 Subject: ZEN: kernel/Kconfig.preempt: Remove EXPERT conditional on PREEMPT_RT diff --git a/debian/patches/patchset-zen/sauce/0012-ZEN-INTERACTIVE-Base-config-item.patch b/debian/patches/patchset-zen/sauce/0011-ZEN-INTERACTIVE-Base-config-item.patch similarity index 80% rename from debian/patches/patchset-zen/sauce/0012-ZEN-INTERACTIVE-Base-config-item.patch rename to debian/patches/patchset-zen/sauce/0011-ZEN-INTERACTIVE-Base-config-item.patch index 3a0e606..e5b9321 100644 --- a/debian/patches/patchset-zen/sauce/0012-ZEN-INTERACTIVE-Base-config-item.patch +++ b/debian/patches/patchset-zen/sauce/0011-ZEN-INTERACTIVE-Base-config-item.patch @@ -1,4 +1,4 @@ -From 8bf253ea1b48fe101dc0161824b9a7d85f420b84 Mon Sep 17 00:00:00 2001 +From e90e4b57fedca27f5c230f39bd6f67672f1d24ef Mon Sep 17 00:00:00 2001 From: "Jan Alexander Steffens (heftig)" Date: Mon, 27 Jan 2020 18:10:06 +0100 Subject: ZEN: INTERACTIVE: Base config item @@ -9,7 +9,7 @@ Subject: ZEN: INTERACTIVE: Base config item --- a/init/Kconfig +++ b/init/Kconfig -@@ -165,6 +165,12 @@ config THREAD_INFO_IN_TASK +@@ -163,6 +163,12 @@ config THREAD_INFO_IN_TASK menu "General setup" diff --git a/debian/patches/patchset-zen/sauce/0013-ZEN-INTERACTIVE-Use-BFQ-as-the-elevator-for-SQ-devic.patch b/debian/patches/patchset-zen/sauce/0012-ZEN-INTERACTIVE-Use-BFQ-as-the-elevator-for-SQ-devic.patch similarity index 89% rename from debian/patches/patchset-zen/sauce/0013-ZEN-INTERACTIVE-Use-BFQ-as-the-elevator-for-SQ-devic.patch rename to debian/patches/patchset-zen/sauce/0012-ZEN-INTERACTIVE-Use-BFQ-as-the-elevator-for-SQ-devic.patch index 195c9f1..a0c2ed5 100644 --- a/debian/patches/patchset-zen/sauce/0013-ZEN-INTERACTIVE-Use-BFQ-as-the-elevator-for-SQ-devic.patch +++ b/debian/patches/patchset-zen/sauce/0012-ZEN-INTERACTIVE-Use-BFQ-as-the-elevator-for-SQ-devic.patch @@ -1,4 +1,4 @@ -From d3b2ab943a1de0838c4bd515dbed45f8f1c3c2cc Mon Sep 17 00:00:00 2001 +From 4da290d83614efedb0eb3b8114070fbddc46677b Mon Sep 17 00:00:00 2001 From: "Jan Alexander Steffens (heftig)" Date: Mon, 27 Jan 2020 18:11:05 +0100 Subject: ZEN: INTERACTIVE: Use BFQ as the elevator for SQ devices @@ -24,7 +24,7 @@ Subject: ZEN: INTERACTIVE: Use BFQ as the elevator for SQ devices /* --- a/init/Kconfig +++ b/init/Kconfig -@@ -171,6 +171,10 @@ config ZEN_INTERACTIVE +@@ -169,6 +169,10 @@ config ZEN_INTERACTIVE help Tunes the kernel for responsiveness at the cost of throughput and power usage. diff --git a/debian/patches/patchset-zen/sauce/0014-ZEN-INTERACTIVE-Use-Kyber-as-the-elevator-for-MQ-dev.patch b/debian/patches/patchset-zen/sauce/0013-ZEN-INTERACTIVE-Use-Kyber-as-the-elevator-for-MQ-dev.patch similarity index 90% rename from debian/patches/patchset-zen/sauce/0014-ZEN-INTERACTIVE-Use-Kyber-as-the-elevator-for-MQ-dev.patch rename to debian/patches/patchset-zen/sauce/0013-ZEN-INTERACTIVE-Use-Kyber-as-the-elevator-for-MQ-dev.patch index 6cf05f0..ef73963 100644 --- a/debian/patches/patchset-zen/sauce/0014-ZEN-INTERACTIVE-Use-Kyber-as-the-elevator-for-MQ-dev.patch +++ b/debian/patches/patchset-zen/sauce/0013-ZEN-INTERACTIVE-Use-Kyber-as-the-elevator-for-MQ-dev.patch @@ -1,4 +1,4 @@ -From d941bedf16b95646be26364f00cf46c6649608a6 Mon Sep 17 00:00:00 2001 +From 1689e70d72aff7c4e26ca326e5e94b2d244d13bd Mon Sep 17 00:00:00 2001 From: "Jan Alexander Steffens (heftig)" Date: Mon, 12 Dec 2022 00:03:03 +0100 Subject: ZEN: INTERACTIVE: Use Kyber as the elevator for MQ devices @@ -26,7 +26,7 @@ Subject: ZEN: INTERACTIVE: Use Kyber as the elevator for MQ devices return elevator_find_get("bfq"); --- a/init/Kconfig +++ b/init/Kconfig -@@ -174,6 +174,7 @@ config ZEN_INTERACTIVE +@@ -172,6 +172,7 @@ config ZEN_INTERACTIVE --- Block Layer ---------------------------------------- Default scheduler for SQ..: mq-deadline -> bfq diff --git a/debian/patches/patchset-zen/sauce/0015-ZEN-INTERACTIVE-Enable-background-reclaim-of-hugepag.patch b/debian/patches/patchset-zen/sauce/0014-ZEN-INTERACTIVE-Enable-background-reclaim-of-hugepag.patch similarity index 95% rename from debian/patches/patchset-zen/sauce/0015-ZEN-INTERACTIVE-Enable-background-reclaim-of-hugepag.patch rename to debian/patches/patchset-zen/sauce/0014-ZEN-INTERACTIVE-Enable-background-reclaim-of-hugepag.patch index e5cefa1..eb8192f 100644 --- a/debian/patches/patchset-zen/sauce/0015-ZEN-INTERACTIVE-Enable-background-reclaim-of-hugepag.patch +++ b/debian/patches/patchset-zen/sauce/0014-ZEN-INTERACTIVE-Enable-background-reclaim-of-hugepag.patch @@ -1,4 +1,4 @@ -From d0ce01e1def080e52770f9a899476bb840807b37 Mon Sep 17 00:00:00 2001 +From 8ae9125b5d9637f6b97d20f71348c3e67322028b Mon Sep 17 00:00:00 2001 From: "Jan Alexander Steffens (heftig)" Date: Mon, 27 Jan 2020 18:21:09 +0100 Subject: ZEN: INTERACTIVE: Enable background reclaim of hugepages @@ -32,7 +32,7 @@ Reasoning and details in the original patch: https://lwn.net/Articles/711248/ --- a/init/Kconfig +++ b/init/Kconfig -@@ -176,6 +176,10 @@ config ZEN_INTERACTIVE +@@ -174,6 +174,10 @@ config ZEN_INTERACTIVE Default scheduler for SQ..: mq-deadline -> bfq Default scheduler for MQ..: none -> kyber diff --git a/debian/patches/patchset-zen/sauce/0016-ZEN-INTERACTIVE-Tune-EEVDF-for-interactivity.patch b/debian/patches/patchset-zen/sauce/0015-ZEN-INTERACTIVE-Tune-EEVDF-for-interactivity.patch similarity index 88% rename from debian/patches/patchset-zen/sauce/0016-ZEN-INTERACTIVE-Tune-EEVDF-for-interactivity.patch rename to debian/patches/patchset-zen/sauce/0015-ZEN-INTERACTIVE-Tune-EEVDF-for-interactivity.patch index 8ba2d48..dea865c 100644 --- a/debian/patches/patchset-zen/sauce/0016-ZEN-INTERACTIVE-Tune-EEVDF-for-interactivity.patch +++ b/debian/patches/patchset-zen/sauce/0015-ZEN-INTERACTIVE-Tune-EEVDF-for-interactivity.patch @@ -1,4 +1,4 @@ -From f1fd33efd4b70519ff51b78c62d6fdf7d4f69620 Mon Sep 17 00:00:00 2001 +From 93c14f60cef3fe7aa8d11edcab2d9a994b667087 Mon Sep 17 00:00:00 2001 From: "Jan Alexander Steffens (heftig)" Date: Tue, 31 Oct 2023 19:03:10 +0100 Subject: ZEN: INTERACTIVE: Tune EEVDF for interactivity @@ -42,14 +42,14 @@ caused by rebalancing too many tasks at once. --- a/init/Kconfig +++ b/init/Kconfig -@@ -180,6 +180,13 @@ config ZEN_INTERACTIVE +@@ -178,6 +178,13 @@ config ZEN_INTERACTIVE Background-reclaim hugepages...: no -> yes + --- EEVDF CPU Scheduler -------------------------------- + -+ Minimal granularity............: 0.75 -> 0.4 ms -+ Migration cost.................: 0.5 -> 0.25 ms ++ Minimal granularity............: 0.7 -> 0.4 ms ++ Migration cost.................: 0.5 -> 0.3 ms + Bandwidth slice size...........: 5 -> 3 ms + Task rebalancing threshold.....: 32 -> 8 + @@ -71,9 +71,9 @@ caused by rebalancing too many tasks at once. +#endif +#ifdef CONFIG_ZEN_INTERACTIVE -+const_debug unsigned int sysctl_sched_migration_cost = 250000UL; ++__read_mostly unsigned int sysctl_sched_migration_cost = 300000UL; +#else - const_debug unsigned int sysctl_sched_migration_cost = 500000UL; + __read_mostly unsigned int sysctl_sched_migration_cost = 500000UL; +#endif static int __init setup_sched_thermal_decay_shift(char *str) @@ -93,7 +93,7 @@ caused by rebalancing too many tasks at once. /* Restrict the NUMA promotion throughput (MB/s) for each target node. */ --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h -@@ -2837,7 +2837,7 @@ extern void deactivate_task(struct rq *r +@@ -2790,7 +2790,7 @@ extern void deactivate_task(struct rq *r extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags); diff --git a/debian/patches/patchset-zen/sauce/0017-ZEN-INTERACTIVE-Tune-ondemand-governor-for-interacti.patch b/debian/patches/patchset-zen/sauce/0016-ZEN-INTERACTIVE-Tune-ondemand-governor-for-interacti.patch similarity index 97% rename from debian/patches/patchset-zen/sauce/0017-ZEN-INTERACTIVE-Tune-ondemand-governor-for-interacti.patch rename to debian/patches/patchset-zen/sauce/0016-ZEN-INTERACTIVE-Tune-ondemand-governor-for-interacti.patch index e35f73a..3b3da24 100644 --- a/debian/patches/patchset-zen/sauce/0017-ZEN-INTERACTIVE-Tune-ondemand-governor-for-interacti.patch +++ b/debian/patches/patchset-zen/sauce/0016-ZEN-INTERACTIVE-Tune-ondemand-governor-for-interacti.patch @@ -1,4 +1,4 @@ -From 75f2a8831bd24a35d9853b11dabc06a138c5e445 Mon Sep 17 00:00:00 2001 +From 06211fa595081b33d5de6d818f5d370055075cb2 Mon Sep 17 00:00:00 2001 From: "Jan Alexander Steffens (heftig)" Date: Mon, 27 Jan 2020 18:27:16 +0100 Subject: ZEN: INTERACTIVE: Tune ondemand governor for interactivity @@ -75,7 +75,7 @@ Remove MuQSS cpufreq configuration. --- a/init/Kconfig +++ b/init/Kconfig -@@ -187,6 +187,12 @@ config ZEN_INTERACTIVE +@@ -185,6 +185,12 @@ config ZEN_INTERACTIVE Bandwidth slice size...........: 5 -> 3 ms Task rebalancing threshold.....: 32 -> 8 diff --git a/debian/patches/patchset-zen/sauce/0018-ZEN-INTERACTIVE-mm-Disable-unevictable-compaction.patch b/debian/patches/patchset-zen/sauce/0017-ZEN-INTERACTIVE-mm-Disable-unevictable-compaction.patch similarity index 85% rename from debian/patches/patchset-zen/sauce/0018-ZEN-INTERACTIVE-mm-Disable-unevictable-compaction.patch rename to debian/patches/patchset-zen/sauce/0017-ZEN-INTERACTIVE-mm-Disable-unevictable-compaction.patch index 4ab12b2..206b4fc 100644 --- a/debian/patches/patchset-zen/sauce/0018-ZEN-INTERACTIVE-mm-Disable-unevictable-compaction.patch +++ b/debian/patches/patchset-zen/sauce/0017-ZEN-INTERACTIVE-mm-Disable-unevictable-compaction.patch @@ -1,4 +1,4 @@ -From b82d80a4195f179b9c0d0c80f662a7f42ed21ce8 Mon Sep 17 00:00:00 2001 +From a79294bd643ed6b143fc76ddfdeb25caa2121aa4 Mon Sep 17 00:00:00 2001 From: Steven Barrett Date: Sat, 5 Mar 2022 11:37:14 -0600 Subject: ZEN: INTERACTIVE: mm: Disable unevictable compaction @@ -12,7 +12,7 @@ turn it off when CONFIG_ZEN_INTERACTIVE is set as well. --- a/init/Kconfig +++ b/init/Kconfig -@@ -179,6 +179,7 @@ config ZEN_INTERACTIVE +@@ -177,6 +177,7 @@ config ZEN_INTERACTIVE --- Virtual Memory Subsystem --------------------------- Background-reclaim hugepages...: no -> yes @@ -22,7 +22,7 @@ turn it off when CONFIG_ZEN_INTERACTIVE is set as well. --- a/mm/Kconfig +++ b/mm/Kconfig -@@ -691,7 +691,7 @@ config COMPACTION +@@ -654,7 +654,7 @@ config COMPACTION config COMPACT_UNEVICTABLE_DEFAULT int depends on COMPACTION diff --git a/debian/patches/patchset-zen/sauce/0019-ZEN-INTERACTIVE-mm-Disable-watermark-boosting-by-def.patch b/debian/patches/patchset-zen/sauce/0018-ZEN-INTERACTIVE-mm-Disable-watermark-boosting-by-def.patch similarity index 89% rename from debian/patches/patchset-zen/sauce/0019-ZEN-INTERACTIVE-mm-Disable-watermark-boosting-by-def.patch rename to debian/patches/patchset-zen/sauce/0018-ZEN-INTERACTIVE-mm-Disable-watermark-boosting-by-def.patch index 369536c..6d8be96 100644 --- a/debian/patches/patchset-zen/sauce/0019-ZEN-INTERACTIVE-mm-Disable-watermark-boosting-by-def.patch +++ b/debian/patches/patchset-zen/sauce/0018-ZEN-INTERACTIVE-mm-Disable-watermark-boosting-by-def.patch @@ -1,4 +1,4 @@ -From 7227af3e01f9ae5a2bcdc9aa652c973438938eb3 Mon Sep 17 00:00:00 2001 +From d7c684733ae88876fb00899c621dd40e08902f1e Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Sat, 28 Mar 2020 13:06:28 -0700 Subject: ZEN: INTERACTIVE: mm: Disable watermark boosting by default @@ -33,7 +33,7 @@ Signed-off-by: Sultan Alsawaf --- a/init/Kconfig +++ b/init/Kconfig -@@ -180,6 +180,7 @@ config ZEN_INTERACTIVE +@@ -178,6 +178,7 @@ config ZEN_INTERACTIVE Background-reclaim hugepages...: no -> yes Compact unevictable............: yes -> no @@ -43,7 +43,7 @@ Signed-off-by: Sultan Alsawaf --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -273,7 +273,11 @@ const char * const migratetype_names[MIG +@@ -276,7 +276,11 @@ const char * const migratetype_names[MIG int min_free_kbytes = 1024; int user_min_free_kbytes = -1; @@ -53,5 +53,5 @@ Signed-off-by: Sultan Alsawaf static int watermark_boost_factor __read_mostly = 15000; +#endif static int watermark_scale_factor = 10; + int defrag_mode; - /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ diff --git a/debian/patches/patchset-zen/sauce/0020-ZEN-INTERACTIVE-mm-Lower-the-non-hugetlbpage-pageblo.patch b/debian/patches/patchset-zen/sauce/0019-ZEN-INTERACTIVE-mm-Lower-the-non-hugetlbpage-pageblo.patch similarity index 95% rename from debian/patches/patchset-zen/sauce/0020-ZEN-INTERACTIVE-mm-Lower-the-non-hugetlbpage-pageblo.patch rename to debian/patches/patchset-zen/sauce/0019-ZEN-INTERACTIVE-mm-Lower-the-non-hugetlbpage-pageblo.patch index 46efc1b..436bbf4 100644 --- a/debian/patches/patchset-zen/sauce/0020-ZEN-INTERACTIVE-mm-Lower-the-non-hugetlbpage-pageblo.patch +++ b/debian/patches/patchset-zen/sauce/0019-ZEN-INTERACTIVE-mm-Lower-the-non-hugetlbpage-pageblo.patch @@ -1,4 +1,4 @@ -From 91187cefc66b9c186a78d7bd996088fc74c66c99 Mon Sep 17 00:00:00 2001 +From faa505ee52add9101fc9701edc9567e7f7a254df Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Wed, 20 Oct 2021 20:50:11 -0700 Subject: ZEN: INTERACTIVE: mm: Lower the non-hugetlbpage pageblock size to @@ -47,7 +47,7 @@ Signed-off-by: Sultan Alsawaf --- a/init/Kconfig +++ b/init/Kconfig -@@ -181,6 +181,7 @@ config ZEN_INTERACTIVE +@@ -179,6 +179,7 @@ config ZEN_INTERACTIVE Background-reclaim hugepages...: no -> yes Compact unevictable............: yes -> no Watermark boost factor.........: 1.5 -> 0 diff --git a/debian/patches/patchset-zen/sauce/0021-ZEN-INTERACTIVE-dm-crypt-Disable-workqueues-for-cryp.patch b/debian/patches/patchset-zen/sauce/0020-ZEN-INTERACTIVE-dm-crypt-Disable-workqueues-for-cryp.patch similarity index 88% rename from debian/patches/patchset-zen/sauce/0021-ZEN-INTERACTIVE-dm-crypt-Disable-workqueues-for-cryp.patch rename to debian/patches/patchset-zen/sauce/0020-ZEN-INTERACTIVE-dm-crypt-Disable-workqueues-for-cryp.patch index 1abf1a2..18697ba 100644 --- a/debian/patches/patchset-zen/sauce/0021-ZEN-INTERACTIVE-dm-crypt-Disable-workqueues-for-cryp.patch +++ b/debian/patches/patchset-zen/sauce/0020-ZEN-INTERACTIVE-dm-crypt-Disable-workqueues-for-cryp.patch @@ -1,4 +1,4 @@ -From 779648709dc797dac595e3007b4c7c3fee254537 Mon Sep 17 00:00:00 2001 +From 8114de0815b1821e66951288e0a14c5a13b68d82 Mon Sep 17 00:00:00 2001 From: Steven Barrett Date: Sat, 21 May 2022 15:15:09 -0500 Subject: ZEN: INTERACTIVE: dm-crypt: Disable workqueues for crypto ops @@ -20,7 +20,7 @@ Fixes: https://github.com/zen-kernel/zen-kernel/issues/282 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c -@@ -3305,6 +3305,11 @@ static int crypt_ctr(struct dm_target *t +@@ -3284,6 +3284,11 @@ static int crypt_ctr(struct dm_target *t goto bad; } @@ -34,7 +34,7 @@ Fixes: https://github.com/zen-kernel/zen-kernel/issues/282 goto bad; --- a/init/Kconfig +++ b/init/Kconfig -@@ -175,6 +175,7 @@ config ZEN_INTERACTIVE +@@ -173,6 +173,7 @@ config ZEN_INTERACTIVE Default scheduler for SQ..: mq-deadline -> bfq Default scheduler for MQ..: none -> kyber diff --git a/debian/patches/patchset-zen/sauce/0022-ZEN-INTERACTIVE-mm-swap-Disable-swap-in-readahead.patch b/debian/patches/patchset-zen/sauce/0021-ZEN-INTERACTIVE-mm-swap-Disable-swap-in-readahead.patch similarity index 84% rename from debian/patches/patchset-zen/sauce/0022-ZEN-INTERACTIVE-mm-swap-Disable-swap-in-readahead.patch rename to debian/patches/patchset-zen/sauce/0021-ZEN-INTERACTIVE-mm-swap-Disable-swap-in-readahead.patch index 86deae9..ee9b53f 100644 --- a/debian/patches/patchset-zen/sauce/0022-ZEN-INTERACTIVE-mm-swap-Disable-swap-in-readahead.patch +++ b/debian/patches/patchset-zen/sauce/0021-ZEN-INTERACTIVE-mm-swap-Disable-swap-in-readahead.patch @@ -1,4 +1,4 @@ -From ef87b1cb12134c34eed834315b03c4a6747b5716 Mon Sep 17 00:00:00 2001 +From 1bc3828fcd5966dd94126355e4d02e42caf1407b Mon Sep 17 00:00:00 2001 From: Steven Barrett Date: Mon, 5 Sep 2022 11:35:20 -0500 Subject: ZEN: INTERACTIVE: mm/swap: Disable swap-in readahead @@ -20,7 +20,7 @@ same change so Zen Kernel users benefit. --- a/init/Kconfig +++ b/init/Kconfig -@@ -183,6 +183,7 @@ config ZEN_INTERACTIVE +@@ -181,6 +181,7 @@ config ZEN_INTERACTIVE Compact unevictable............: yes -> no Watermark boost factor.........: 1.5 -> 0 Pageblock order................: 10 -> 3 @@ -30,7 +30,7 @@ same change so Zen Kernel users benefit. --- a/mm/swap.c +++ b/mm/swap.c -@@ -1081,6 +1081,10 @@ void folio_batch_remove_exceptionals(str +@@ -1091,6 +1091,10 @@ static const struct ctl_table swap_sysct */ void __init swap_setup(void) { @@ -41,9 +41,11 @@ same change so Zen Kernel users benefit. unsigned long megs = totalram_pages() >> (20 - PAGE_SHIFT); /* Use a smaller cluster for small-memory machines */ -@@ -1092,4 +1096,5 @@ void __init swap_setup(void) +@@ -1102,6 +1106,7 @@ void __init swap_setup(void) * Right now other parts of the system means that we * _really_ don't want to cluster much more */ +#endif + + register_sysctl_init("vm", swap_sysctl_table); } diff --git a/debian/patches/patchset-zen/sauce/0023-ZEN-INTERACTIVE-Document-PDS-BMQ-configuration.patch b/debian/patches/patchset-zen/sauce/0022-ZEN-INTERACTIVE-Document-PDS-BMQ-configuration.patch similarity index 85% rename from debian/patches/patchset-zen/sauce/0023-ZEN-INTERACTIVE-Document-PDS-BMQ-configuration.patch rename to debian/patches/patchset-zen/sauce/0022-ZEN-INTERACTIVE-Document-PDS-BMQ-configuration.patch index 50c0be2..ffc82a2 100644 --- a/debian/patches/patchset-zen/sauce/0023-ZEN-INTERACTIVE-Document-PDS-BMQ-configuration.patch +++ b/debian/patches/patchset-zen/sauce/0022-ZEN-INTERACTIVE-Document-PDS-BMQ-configuration.patch @@ -1,4 +1,4 @@ -From cb33a6dc022faa07ac1e1cd544567b28a7e9afeb Mon Sep 17 00:00:00 2001 +From 625fb48c0c4fab13b9c2f231c3fe6368a1b78242 Mon Sep 17 00:00:00 2001 From: Steven Barrett Date: Sun, 19 Sep 2021 16:03:36 -0500 Subject: ZEN: INTERACTIVE: Document PDS/BMQ configuration @@ -9,7 +9,7 @@ Subject: ZEN: INTERACTIVE: Document PDS/BMQ configuration --- a/init/Kconfig +++ b/init/Kconfig -@@ -192,6 +192,11 @@ config ZEN_INTERACTIVE +@@ -190,6 +190,11 @@ config ZEN_INTERACTIVE Bandwidth slice size...........: 5 -> 3 ms Task rebalancing threshold.....: 32 -> 8 diff --git a/debian/patches/series b/debian/patches/series index dbabcc3..c647d49 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -28,6 +28,7 @@ debian/linux-perf-remove-remaining-source-filenames-from-executable.patch # Fixes/improvements to firmware loading features/all/drivers-media-dvb-usb-af9005-request_firmware.patch debian/iwlwifi-do-not-request-unreleased-firmware.patch +debian/firmware_loader-log-direct-loading-failures-as-info-for-d-i.patch bugfix/all/radeon-amdgpu-firmware-is-required-for-drm-and-kms-on-r600-onward.patch # Change some defaults for security reasons @@ -68,7 +69,6 @@ features/x86/x86-make-x32-syscall-support-conditional.patch # Miscellaneous bug fixes bugfix/all/disable-some-marvell-phys.patch bugfix/all/fs-add-module_softdep-declarations-for-hard-coded-cr.patch -bugfix/all/documentation-use-relative-source-paths-in-abi-documentation.patch # Miscellaneous features @@ -107,15 +107,14 @@ bugfix/all/libbpf-use-the-standard-fixdep-build-rule.patch ## own patches -krd/0001-Revert-objtool-dont-fail-the-kernel-build-on-fatal-errors.patch -krd/0002-established-timeout.patch -krd/0003-local-ports.patch -krd/0004-bridge-group_fwd_mask.patch -krd/0005-certs-genkey.patch +krd/0001-established-timeout.patch +krd/0002-local-ports.patch +krd/0003-bridge-group_fwd_mask.patch +krd/0004-certs-genkey.patch ## 3rd party patches -mixed-arch/0001-ZEN-Add-graysky-s-more-ISA-levels-and-uarches.patch +mixed-arch/0001-graysky2-more-ISA-levels-and-uarches.patch mixed-arch/0002-ZEN-Restore-CONFIG_OPTIMIZE_FOR_PERFORMANCE_O3.patch mixed-arch/0003-krd-adjust-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3.patch mixed-arch/0004-XANMOD-x86-build-Prevent-generating-avx2-and-avx512-.patch @@ -124,55 +123,31 @@ mixed-arch/0006-XANMOD-kbuild-Add-GCC-SMS-based-modulo-scheduling-fl.patch misc-openwrt/0001-mac80211-ignore-AP-power-level-when-tx-power-type-is.patch -patchset-pf/amd-pstate/0001-cpufreq-amd-pstate-Remove-the-redundant-des_perf-cla.patch -patchset-pf/amd-pstate/0002-cpufreq-amd-pstate-Modularize-perf-freq-conversion.patch -patchset-pf/amd-pstate/0003-cpufreq-amd-pstate-Remove-the-unnecessary-cpufreq_up.patch -patchset-pf/amd-pstate/0004-cpufreq-amd-pstate-Use-scope-based-cleanup-for-cpufr.patch -patchset-pf/amd-pstate/0005-cpufreq-amd-pstate-Remove-the-unncecessary-driver_lo.patch -patchset-pf/amd-pstate/0006-cpufreq-amd-pstate-Fix-the-clamping-of-perf-values.patch -patchset-pf/amd-pstate/0007-cpufreq-amd-pstate-Show-a-warning-when-a-CPU-fails-t.patch -patchset-pf/amd-pstate/0008-cpufreq-amd-pstate-Drop-min-and-max-cached-frequenci.patch -patchset-pf/amd-pstate/0009-cpufreq-amd-pstate-Move-perf-values-into-a-union.patch -patchset-pf/amd-pstate/0010-cpufreq-amd-pstate-Overhaul-locking.patch -patchset-pf/amd-pstate/0011-cpufreq-amd-pstate-Drop-cppc_cap1_cached.patch -patchset-pf/amd-pstate/0012-cpufreq-amd-pstate-ut-Use-_free-macro-to-free-put-po.patch -patchset-pf/amd-pstate/0013-cpufreq-amd-pstate-ut-Allow-lowest-nonlinear-and-low.patch -patchset-pf/amd-pstate/0014-cpufreq-amd-pstate-ut-Drop-SUCCESS-and-FAIL-enums.patch -patchset-pf/amd-pstate/0015-cpufreq-amd-pstate-ut-Run-on-all-of-the-correct-CPUs.patch -patchset-pf/amd-pstate/0016-cpufreq-amd-pstate-ut-Adjust-variable-scope.patch -patchset-pf/amd-pstate/0017-cpufreq-amd-pstate-Replace-all-AMD_CPPC_-macros-with.patch -patchset-pf/amd-pstate/0018-cpufreq-amd-pstate-Cache-CPPC-request-in-shared-mem-.patch -patchset-pf/amd-pstate/0019-cpufreq-amd-pstate-Move-all-EPP-tracing-into-_update.patch -patchset-pf/amd-pstate/0020-cpufreq-amd-pstate-Update-cppc_req_cached-for-shared.patch -patchset-pf/amd-pstate/0021-cpufreq-amd-pstate-Drop-debug-statements-for-policy-.patch -patchset-pf/amd-pstate/0022-cpufreq-amd-pstate-Rework-CPPC-enabling.patch -patchset-pf/amd-pstate/0023-cpufreq-amd-pstate-Stop-caching-EPP.patch -patchset-pf/amd-pstate/0024-cpufreq-amd-pstate-Drop-actions-in-amd_pstate_epp_cp.patch -patchset-pf/amd-pstate/0025-cpufreq-amd-pstate-fix-warning-noticed-by-kernel-tes.patch -patchset-pf/amd-pstate/0026-cpufreq-amd-pstate-Fix-min_limit-perf-and-freq-updat.patch - patchset-pf/cpuidle/0001-cpuidle-Prefer-teo-over-menu-governor.patch -patchset-pf/crypto/0001-crypto-x86-aes-xts-make-the-fast-path-64-bit-specifi.patch -patchset-pf/crypto/0002-crypto-x86-aes-ctr-rewrite-AESNI-AVX-optimized-CTR-a.patch +patchset-pf/kbuild/0001-ice-mark-ice_write_prof_mask_reg-as-noinline.patch +patchset-pf/kbuild/0002-wifi-mac80211-mark-copy_mesh_setup-as-noinline.patch -patchset-pf/invlpgb/0001-x86-mm-Remove-pv_ops.mmu.tlb_remove_table-call.patch -patchset-pf/invlpgb/0002-x86-mm-Consolidate-full-flush-threshold-decision.patch -patchset-pf/invlpgb/0003-x86-mm-Add-INVLPGB-feature-and-Kconfig-entry.patch -patchset-pf/invlpgb/0004-x86-mm-Add-INVLPGB-support-code.patch -patchset-pf/invlpgb/0005-x86-mm-Use-INVLPGB-for-kernel-TLB-flushes.patch -patchset-pf/invlpgb/0006-x86-mm-Use-broadcast-TLB-flushing-in-page-reclaim.patch -patchset-pf/invlpgb/0007-x86-mm-Add-global-ASID-allocation-helper-functions.patch -patchset-pf/invlpgb/0008-x86-mm-Handle-global-ASID-context-switch-and-TLB-flu.patch -patchset-pf/invlpgb/0009-x86-mm-Add-global-ASID-process-exit-helpers.patch -patchset-pf/invlpgb/0010-x86-mm-Enable-broadcast-TLB-invalidation-for-multi-t.patch -patchset-pf/invlpgb/0011-x86-mm-Enable-AMD-translation-cache-extensions.patch -patchset-pf/invlpgb/0012-x86-mm-Always-set-the-ASID-valid-bit-for-the-INVLPGB.patch -patchset-pf/invlpgb/0013-x86-mm-Only-do-broadcast-flush-from-reclaim-if-pages.patch -patchset-pf/invlpgb/0014-x86-mm-Eliminate-window-where-TLB-flushes-may-be-ina.patch +patchset-pf/nfs/0001-NFSD-unregister-filesystem-in-case-genl_register_fam.patch +patchset-pf/nfs/0002-NFSD-fix-race-between-nfsd-registration-and-exports_.patch +patchset-pf/nfs/0003-nfsd-fix-access-checking-for-NLM-under-XPRTSEC-polic.patch +patchset-pf/nfs/0004-nfsd-nfsd4_spo_must_allow-must-check-this-is-a-v4-co.patch +patchset-pf/nfs/0005-nfsd-Initialize-ssc-before-laundromat_work-to-preven.patch +patchset-pf/nfs/0006-NFSD-Implement-FATTR4_CLONE_BLKSIZE-attribute.patch +patchset-pf/nfs/0007-fs-nfs-read-fix-double-unlock-bug-in-nfs_return_empt.patch +patchset-pf/nfs/0008-NFSv4-Don-t-check-for-OPEN-feature-support-in-v4.1.patch +patchset-pf/nfs/0009-NFS-always-probe-for-LOCALIO-support-asynchronously.patch -patchset-pf/zstd/0001-zstd-import-upstream-v1.5.7.patch -patchset-pf/zstd/0002-lib-zstd-Refactor-intentional-wrap-around-test.patch +patchset-pf/smb/0001-smb-client-add-NULL-check-in-automount_fullpath.patch +patchset-pf/smb/0002-cifs-reset-connections-for-all-channels-when-reconne.patch +patchset-pf/smb/0003-cifs-update-dstaddr-whenever-channel-iface-is-update.patch +patchset-pf/smb/0004-cifs-dns-resolution-is-needed-only-for-primary-chann.patch +patchset-pf/smb/0005-cifs-deal-with-the-channel-loading-lag-while-picking.patch +patchset-pf/smb/0006-cifs-serialize-other-channels-when-query-server-inte.patch +patchset-pf/smb/0007-cifs-do-not-disable-interface-polling-on-failure.patch +patchset-pf/smb/0008-smb-improve-directory-cache-reuse-for-readdir-operat.patch + +patchset-pf/xfs/0001-xfs-don-t-assume-perags-are-initialised-when-trimmin.patch patchset-xanmod/binder/0001-binder-turn-into-module.patch @@ -214,8 +189,6 @@ patchset-xanmod/valve/0004-leds-steamdeck-Add-support-for-Steam-Deck-LED.patch patchset-xanmod/valve/0005-mfd-Add-MFD-core-driver-for-Steam-Deck.patch patchset-xanmod/valve/0006-mfd-steamdeck-Expose-controller-board-power-in-sysfs.patch -patchset-zen/ksm/0001-mm-expose-per-process-KSM-control-via-syscalls.patch - patchset-xanmod/xanmod/0001-kbuild-Re-add-.config-file-required-to-sign-external.patch patchset-xanmod/xanmod/0002-kbuild-Remove-GCC-minimal-function-alignment.patch patchset-xanmod/xanmod/0003-XANMOD-fair-Set-scheduler-tunable-latencies-to-unsca.patch @@ -227,7 +200,7 @@ patchset-xanmod/xanmod/0008-XANMOD-blk-wbt-Set-wbt_default_latency_nsec-to-2msec patchset-xanmod/xanmod/0009-XANMOD-kconfig-add-500Hz-timer-interrupt-kernel-conf.patch patchset-xanmod/xanmod/0010-XANMOD-dcache-cache_pressure-50-decreases-the-rate-a.patch patchset-xanmod/xanmod/0011-XANMOD-mm-Raise-max_map_count-default-value.patch -patchset-xanmod/xanmod/0012-XANMOD-mm-vmscan-Set-minimum-amount-of-swapping.patch +patchset-xanmod/xanmod/0012-XANMOD-mm-vmscan-Reduce-amount-of-swapping.patch patchset-xanmod/xanmod/0013-XANMOD-sched-autogroup-Add-kernel-parameter-and-conf.patch patchset-xanmod/xanmod/0014-XANMOD-cpufreq-tunes-ondemand-and-conservative-gover.patch patchset-xanmod/xanmod/0015-XANMOD-lib-kconfig.debug-disable-default-SYMBOLIC_ER.patch @@ -235,35 +208,68 @@ patchset-xanmod/xanmod/0016-XANMOD-scripts-setlocalversion-remove-tag-for-git-re patchset-xanmod/xanmod/0017-XANMOD-scripts-setlocalversion-Move-localversion-fil.patch patchset-zen/sauce/0001-ZEN-Add-VHBA-driver.patch -patchset-zen/sauce/0002-VHBA-fix-building-with-kernel-6.14-rc1.patch -patchset-zen/sauce/0003-ZEN-PCI-Add-Intel-remapped-NVMe-device-support.patch -patchset-zen/sauce/0004-ZEN-Disable-stack-conservation-for-GCC.patch -patchset-zen/sauce/0005-ZEN-Input-evdev-use-call_rcu-when-detaching-client.patch -patchset-zen/sauce/0006-ZEN-cpufreq-Remove-schedutil-dependency-on-Intel-AMD.patch -patchset-zen/sauce/0007-ZEN-intel-pstate-Implement-enable-parameter.patch -patchset-zen/sauce/0008-ZEN-drm-amdgpu-pm-Allow-override-of-min_power_limit-.patch -patchset-zen/sauce/0009-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch -patchset-zen/sauce/0010-ZEN-ahci-Disable-staggered-spinup-by-default.patch -patchset-zen/sauce/0011-ZEN-kernel-Kconfig.preempt-Remove-EXPERT-conditional.patch -patchset-zen/sauce/0012-ZEN-INTERACTIVE-Base-config-item.patch -patchset-zen/sauce/0013-ZEN-INTERACTIVE-Use-BFQ-as-the-elevator-for-SQ-devic.patch -patchset-zen/sauce/0014-ZEN-INTERACTIVE-Use-Kyber-as-the-elevator-for-MQ-dev.patch -patchset-zen/sauce/0015-ZEN-INTERACTIVE-Enable-background-reclaim-of-hugepag.patch -patchset-zen/sauce/0016-ZEN-INTERACTIVE-Tune-EEVDF-for-interactivity.patch -patchset-zen/sauce/0017-ZEN-INTERACTIVE-Tune-ondemand-governor-for-interacti.patch -patchset-zen/sauce/0018-ZEN-INTERACTIVE-mm-Disable-unevictable-compaction.patch -patchset-zen/sauce/0019-ZEN-INTERACTIVE-mm-Disable-watermark-boosting-by-def.patch -patchset-zen/sauce/0020-ZEN-INTERACTIVE-mm-Lower-the-non-hugetlbpage-pageblo.patch -patchset-zen/sauce/0021-ZEN-INTERACTIVE-dm-crypt-Disable-workqueues-for-cryp.patch -patchset-zen/sauce/0022-ZEN-INTERACTIVE-mm-swap-Disable-swap-in-readahead.patch -patchset-zen/sauce/0023-ZEN-INTERACTIVE-Document-PDS-BMQ-configuration.patch +patchset-zen/sauce/0002-ZEN-PCI-Add-Intel-remapped-NVMe-device-support.patch +patchset-zen/sauce/0003-ZEN-Disable-stack-conservation-for-GCC.patch +patchset-zen/sauce/0004-ZEN-Input-evdev-use-call_rcu-when-detaching-client.patch +patchset-zen/sauce/0005-ZEN-cpufreq-Remove-schedutil-dependency-on-Intel-AMD.patch +patchset-zen/sauce/0006-ZEN-intel-pstate-Implement-enable-parameter.patch +patchset-zen/sauce/0007-ZEN-drm-amdgpu-pm-Allow-override-of-min_power_limit-.patch +patchset-zen/sauce/0008-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch +patchset-zen/sauce/0009-ZEN-ahci-Disable-staggered-spinup-by-default.patch +patchset-zen/sauce/0010-ZEN-kernel-Kconfig.preempt-Remove-EXPERT-conditional.patch +patchset-zen/sauce/0011-ZEN-INTERACTIVE-Base-config-item.patch +patchset-zen/sauce/0012-ZEN-INTERACTIVE-Use-BFQ-as-the-elevator-for-SQ-devic.patch +patchset-zen/sauce/0013-ZEN-INTERACTIVE-Use-Kyber-as-the-elevator-for-MQ-dev.patch +patchset-zen/sauce/0014-ZEN-INTERACTIVE-Enable-background-reclaim-of-hugepag.patch +patchset-zen/sauce/0015-ZEN-INTERACTIVE-Tune-EEVDF-for-interactivity.patch +patchset-zen/sauce/0016-ZEN-INTERACTIVE-Tune-ondemand-governor-for-interacti.patch +patchset-zen/sauce/0017-ZEN-INTERACTIVE-mm-Disable-unevictable-compaction.patch +patchset-zen/sauce/0018-ZEN-INTERACTIVE-mm-Disable-watermark-boosting-by-def.patch +patchset-zen/sauce/0019-ZEN-INTERACTIVE-mm-Lower-the-non-hugetlbpage-pageblo.patch +patchset-zen/sauce/0020-ZEN-INTERACTIVE-dm-crypt-Disable-workqueues-for-cryp.patch +patchset-zen/sauce/0021-ZEN-INTERACTIVE-mm-swap-Disable-swap-in-readahead.patch +patchset-zen/sauce/0022-ZEN-INTERACTIVE-Document-PDS-BMQ-configuration.patch -patchset-pf/fixes/0001-Kunit-to-check-the-longest-symbol-length.patch -patchset-pf/fixes/0002-x86-tools-Drop-duplicate-unlikely-definition-in-insn.patch -patchset-pf/fixes/0003-ice-mark-ice_write_prof_mask_reg-as-noinline.patch -patchset-pf/fixes/0004-wifi-ath12k-Abort-scan-before-removing-link-interfac.patch -patchset-pf/fixes/0005-Kconfig-switch-CONFIG_SYSFS_SYCALL-default-to-n.patch -patchset-pf/fixes/0006-wifi-mac80211-mark-copy_mesh_setup-as-noinline.patch +patchset-pf/fixes/0001-mm-fix-ratelimit_pages-update-error-in-dirty_ratio_h.patch +patchset-pf/fixes/0002-vgacon-Add-check-for-vc_origin-address-range-in-vgac.patch +patchset-pf/fixes/0003-fbdev-Fix-do_register_framebuffer-to-prevent-null-pt.patch +patchset-pf/fixes/0004-fbdev-Fix-fb_set_var-to-prevent-null-ptr-deref-in-fb.patch +patchset-pf/fixes/0005-anon_inode-use-a-proper-mode-internally.patch +patchset-pf/fixes/0006-anon_inode-explicitly-block-setattr.patch +patchset-pf/fixes/0007-anon_inode-raise-SB_I_NODEV-and-SB_I_NOEXEC.patch +patchset-pf/fixes/0008-fs-add-S_ANON_INODE.patch +patchset-pf/fixes/0009-configfs-Do-not-override-creating-attribute-file-fai.patch +patchset-pf/fixes/0010-Don-t-propagate-mounts-into-detached-trees.patch +patchset-pf/fixes/0011-mm-filemap-gate-dropbehind-invalidate-on-folio-dirty.patch +patchset-pf/fixes/0012-mm-filemap-use-filemap_end_dropbehind-for-read-inval.patch +patchset-pf/fixes/0013-Revert-Disable-FOP_DONTCACHE-for-now-due-to-bugs.patch +patchset-pf/fixes/0014-mm-filemap-unify-read-write-dropbehind-naming.patch +patchset-pf/fixes/0015-mm-filemap-unify-dropbehind-flag-testing-and-clearin.patch +patchset-pf/fixes/0016-mm-khugepaged-fix-race-with-folio-split-free-using-t.patch +patchset-pf/fixes/0017-mm-add-folio_expected_ref_count-for-reference-count-.patch +patchset-pf/fixes/0018-mm-fix-uprobe-pte-be-overwritten-when-expanding-vma.patch +patchset-pf/fixes/0019-mm-hugetlb-unshare-page-tables-during-VMA-split-not-.patch +patchset-pf/fixes/0020-mm-hugetlb-fix-huge_pmd_unshare-vs-GUP-fast-race.patch +patchset-pf/fixes/0021-mm-madvise-handle-madvise_lock-failure-during-race-u.patch +patchset-pf/fixes/0022-video-screen_info-Relocate-framebuffers-behind-PCI-b.patch +patchset-pf/fixes/0023-sysfb-Fix-screen_info-type-check-for-VGA.patch +patchset-pf/fixes/0024-x86-iopl-Cure-TIF_IO_BITMAP-inconsistencies.patch +patchset-pf/fixes/0025-watchdog-fix-watchdog-may-detect-false-positive-of-s.patch +patchset-pf/fixes/0026-sched-rt-Fix-race-in-push_rt_task.patch +patchset-pf/fixes/0027-sched-fair-Adhere-to-place_entity-constraints.patch +patchset-pf/fixes/0028-alloc_tag-handle-module-codetag-load-errors-as-modul.patch +patchset-pf/fixes/0029-svcrdma-Unregister-the-device-if-svc_rdma_accept-fai.patch +patchset-pf/fixes/0030-SUNRPC-Prevent-hang-on-NFS-mount-with-xprtsec-m-tls.patch +patchset-pf/fixes/0031-hv_netvsc-fix-potential-deadlock-in-netvsc_vf_setxdp.patch +patchset-pf/fixes/0032-net-clear-the-dst-when-changing-skb-protocol.patch +patchset-pf/fixes/0033-net_sched-sch_sfq-reject-invalid-perturb-period.patch +patchset-pf/fixes/0034-posix-cpu-timers-fix-race-between-handle_posix_cpu_t.patch +patchset-pf/fixes/0035-mm-vma-reset-VMA-iterator-on-commit_merge-OOM-failur.patch +patchset-pf/fixes/0036-mm-close-theoretical-race-where-stale-TLB-entries-co.patch +patchset-pf/fixes/0037-io_uring-kbuf-don-t-truncate-end-buffer-for-multiple.patch +patchset-pf/fixes/0038-nvme-always-punt-polled-uring_cmd-end_io-work-to-tas.patch +patchset-pf/fixes/0039-block-Clear-BIO_EMULATES_ZONE_APPEND-flag-on-BIO-com.patch +patchset-pf/fixes/0040-block-use-plug-request-list-tail-for-one-shot-backme.patch patchset-zen/fixes/0001-drivers-firmware-skip-simpledrm-if-nvidia-drm.modese.patch -patchset-zen/fixes/0002-Bluetooth-hci_event-Fix-not-using-key-encryption-siz.patch +patchset-zen/fixes/0002-x86-cpu-Help-users-notice-when-running-old-Intel-mic.patch