From 8d133d69d2286dd591d3be4fbc284e199297c636 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Mon, 29 Apr 2024 20:44:30 +0200 Subject: [PATCH 1/9] aes-xts Signed-off-by: Peter Jung --- arch/x86/Kconfig.assembler | 10 + arch/x86/crypto/Makefile | 3 +- arch/x86/crypto/aes-xts-avx-x86_64.S | 845 +++++++++++++++++++++++++++ arch/x86/crypto/aesni-intel_asm.S | 276 +++------ arch/x86/crypto/aesni-intel_glue.c | 351 ++++++++--- arch/x86/crypto/sha256_ni_asm.S | 253 ++------ 6 files changed, 1261 insertions(+), 477 deletions(-) create mode 100644 arch/x86/crypto/aes-xts-avx-x86_64.S diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler index 8ad41da301e5..59aedf32c4ea 100644 --- a/arch/x86/Kconfig.assembler +++ b/arch/x86/Kconfig.assembler @@ -25,6 +25,16 @@ config AS_GFNI help Supported by binutils >= 2.30 and LLVM integrated assembler +config AS_VAES + def_bool $(as-instr,vaesenc %ymm0$(comma)%ymm1$(comma)%ymm2) + help + Supported by binutils >= 2.30 and LLVM integrated assembler + +config AS_VPCLMULQDQ + def_bool $(as-instr,vpclmulqdq \$0x10$(comma)%ymm0$(comma)%ymm1$(comma)%ymm2) + help + Supported by binutils >= 2.30 and LLVM integrated assembler + config AS_WRUSS def_bool $(as-instr,wrussq %rax$(comma)(%rbx)) help diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 9aa46093c91b..9c5ce5613738 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -48,7 +48,8 @@ chacha-x86_64-$(CONFIG_AS_AVX512) += chacha-avx512vl-x86_64.o obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o -aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o +aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o \ + aes_ctrby8_avx-x86_64.o aes-xts-avx-x86_64.o obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o sha1-ssse3-y := sha1_avx2_x86_64_asm.o sha1_ssse3_asm.o sha1_ssse3_glue.o diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S new file mode 100644 index 000000000000..48f97b79f7a9 --- /dev/null +++ b/arch/x86/crypto/aes-xts-avx-x86_64.S @@ -0,0 +1,845 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * AES-XTS for modern x86_64 CPUs + * + * Copyright 2024 Google LLC + * + * Author: Eric Biggers + */ + +/* + * This file implements AES-XTS for modern x86_64 CPUs. To handle the + * complexities of coding for x86 SIMD, e.g. where every vector length needs + * different code, it uses a macro to generate several implementations that + * share similar source code but are targeted at different CPUs, listed below: + * + * AES-NI + AVX + * - 128-bit vectors (1 AES block per vector) + * - VEX-coded instructions + * - xmm0-xmm15 + * - This is for older CPUs that lack VAES but do have AVX. + * + * VAES + VPCLMULQDQ + AVX2 + * - 256-bit vectors (2 AES blocks per vector) + * - VEX-coded instructions + * - ymm0-ymm15 + * - This is for CPUs that have VAES but lack AVX512 or AVX10, + * e.g. Intel's Alder Lake and AMD's Zen 3. + * + * VAES + VPCLMULQDQ + AVX10/256 + BMI2 + * - 256-bit vectors (2 AES blocks per vector) + * - EVEX-coded instructions + * - ymm0-ymm31 + * - This is for CPUs that have AVX512 but where using zmm registers causes + * downclocking, and for CPUs that have AVX10/256 but not AVX10/512. + * - By "AVX10/256" we really mean (AVX512BW + AVX512VL) || AVX10/256. + * To avoid confusion with 512-bit, we just write AVX10/256. + * + * VAES + VPCLMULQDQ + AVX10/512 + BMI2 + * - Same as the previous one, but upgrades to 512-bit vectors + * (4 AES blocks per vector) in zmm0-zmm31. + * - This is for CPUs that have good AVX512 or AVX10/512 support. + * + * This file doesn't have an implementation for AES-NI alone (without AVX), as + * the lack of VEX would make all the assembly code different. + * + * When we use VAES, we also use VPCLMULQDQ to parallelize the computation of + * the XTS tweaks. This avoids a bottleneck. Currently there don't seem to be + * any CPUs that support VAES but not VPCLMULQDQ. If that changes, we might + * need to start also providing an implementation using VAES alone. + * + * The AES-XTS implementations in this file support everything required by the + * crypto API, including support for arbitrary input lengths and multi-part + * processing. However, they are most heavily optimized for the common case of + * power-of-2 length inputs that are processed in a single part (disk sectors). + */ + +#include +#include + +.section .rodata +.p2align 4 +.Lgf_poly: + // The low 64 bits of this value represent the polynomial x^7 + x^2 + x + // + 1. It is the value that must be XOR'd into the low 64 bits of the + // tweak each time a 1 is carried out of the high 64 bits. + // + // The high 64 bits of this value is just the internal carry bit that + // exists when there's a carry out of the low 64 bits of the tweak. + .quad 0x87, 1 + + // This table contains constants for vpshufb and vpblendvb, used to + // handle variable byte shifts and blending during ciphertext stealing + // on CPUs that don't support AVX10-style masking. +.Lcts_permute_table: + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 +.text + +// Function parameters +.set KEY, %rdi // Initially points to crypto_aes_ctx, then is + // advanced to point to 7th-from-last round key +.set SRC, %rsi // Pointer to next source data +.set DST, %rdx // Pointer to next destination data +.set LEN, %ecx // Remaining length in bytes +.set LEN8, %cl +.set LEN64, %rcx +.set TWEAK, %r8 // Pointer to next tweak + +// %rax holds the AES key length in bytes. +.set KEYLEN, %eax +.set KEYLEN64, %rax + +// %r9-r11 are available as temporaries. + +.macro _define_Vi i +.if VL == 16 + .set V\i, %xmm\i +.elseif VL == 32 + .set V\i, %ymm\i +.elseif VL == 64 + .set V\i, %zmm\i +.else + .error "Unsupported Vector Length (VL)" +.endif +.endm + +.macro _define_aliases + // Define register aliases V0-V15, or V0-V31 if all 32 SIMD registers + // are available, that map to the xmm, ymm, or zmm registers according + // to the selected Vector Length (VL). + _define_Vi 0 + _define_Vi 1 + _define_Vi 2 + _define_Vi 3 + _define_Vi 4 + _define_Vi 5 + _define_Vi 6 + _define_Vi 7 + _define_Vi 8 + _define_Vi 9 + _define_Vi 10 + _define_Vi 11 + _define_Vi 12 + _define_Vi 13 + _define_Vi 14 + _define_Vi 15 +.if USE_AVX10 + _define_Vi 16 + _define_Vi 17 + _define_Vi 18 + _define_Vi 19 + _define_Vi 20 + _define_Vi 21 + _define_Vi 22 + _define_Vi 23 + _define_Vi 24 + _define_Vi 25 + _define_Vi 26 + _define_Vi 27 + _define_Vi 28 + _define_Vi 29 + _define_Vi 30 + _define_Vi 31 +.endif + + // V0-V3 hold the data blocks during the main loop, or temporary values + // otherwise. V4-V5 hold temporary values. + + // V6-V9 hold XTS tweaks. Each 128-bit lane holds one tweak. + .set TWEAK0_XMM, %xmm6 + .set TWEAK0, V6 + .set TWEAK1_XMM, %xmm7 + .set TWEAK1, V7 + .set TWEAK2, V8 + .set TWEAK3, V9 + + // V10-V13 are used for computing the next values of TWEAK[0-3]. + .set NEXT_TWEAK0, V10 + .set NEXT_TWEAK1, V11 + .set NEXT_TWEAK2, V12 + .set NEXT_TWEAK3, V13 + + // V14 holds the constant from .Lgf_poly, copied to all 128-bit lanes. + .set GF_POLY_XMM, %xmm14 + .set GF_POLY, V14 + + // V15 holds the key for AES "round 0", copied to all 128-bit lanes. + .set KEY0_XMM, %xmm15 + .set KEY0, V15 + + // If 32 SIMD registers are available, then V16-V29 hold the remaining + // AES round keys, copied to all 128-bit lanes. + // + // AES-128, AES-192, and AES-256 use different numbers of round keys. + // To allow handling all three variants efficiently, we align the round + // keys to the *end* of this register range. I.e., AES-128 uses + // KEY5-KEY14, AES-192 uses KEY3-KEY14, and AES-256 uses KEY1-KEY14. + // (All also use KEY0 for the XOR-only "round" at the beginning.) +.if USE_AVX10 + .set KEY1_XMM, %xmm16 + .set KEY1, V16 + .set KEY2_XMM, %xmm17 + .set KEY2, V17 + .set KEY3_XMM, %xmm18 + .set KEY3, V18 + .set KEY4_XMM, %xmm19 + .set KEY4, V19 + .set KEY5_XMM, %xmm20 + .set KEY5, V20 + .set KEY6_XMM, %xmm21 + .set KEY6, V21 + .set KEY7_XMM, %xmm22 + .set KEY7, V22 + .set KEY8_XMM, %xmm23 + .set KEY8, V23 + .set KEY9_XMM, %xmm24 + .set KEY9, V24 + .set KEY10_XMM, %xmm25 + .set KEY10, V25 + .set KEY11_XMM, %xmm26 + .set KEY11, V26 + .set KEY12_XMM, %xmm27 + .set KEY12, V27 + .set KEY13_XMM, %xmm28 + .set KEY13, V28 + .set KEY14_XMM, %xmm29 + .set KEY14, V29 +.endif + // V30-V31 are currently unused. +.endm + +// Move a vector between memory and a register. +.macro _vmovdqu src, dst +.if VL < 64 + vmovdqu \src, \dst +.else + vmovdqu8 \src, \dst +.endif +.endm + +// Broadcast a 128-bit value into a vector. +.macro _vbroadcast128 src, dst +.if VL == 16 && !USE_AVX10 + vmovdqu \src, \dst +.elseif VL == 32 && !USE_AVX10 + vbroadcasti128 \src, \dst +.else + vbroadcasti32x4 \src, \dst +.endif +.endm + +// XOR two vectors together. +.macro _vpxor src1, src2, dst +.if USE_AVX10 + vpxord \src1, \src2, \dst +.else + vpxor \src1, \src2, \dst +.endif +.endm + +// XOR three vectors together. +.macro _xor3 src1, src2, src3_and_dst +.if USE_AVX10 + // vpternlogd with immediate 0x96 is a three-argument XOR. + vpternlogd $0x96, \src1, \src2, \src3_and_dst +.else + vpxor \src1, \src3_and_dst, \src3_and_dst + vpxor \src2, \src3_and_dst, \src3_and_dst +.endif +.endm + +// Given a 128-bit XTS tweak in the xmm register \src, compute the next tweak +// (by multiplying by the polynomial 'x') and write it to \dst. +.macro _next_tweak src, tmp, dst + vpshufd $0x13, \src, \tmp + vpaddq \src, \src, \dst + vpsrad $31, \tmp, \tmp + vpand GF_POLY_XMM, \tmp, \tmp + vpxor \tmp, \dst, \dst +.endm + +// Given the XTS tweak(s) in the vector \src, compute the next vector of +// tweak(s) (by multiplying by the polynomial 'x^(VL/16)') and write it to \dst. +// +// If VL > 16, then there are multiple tweaks, and we use vpclmulqdq to compute +// all tweaks in the vector in parallel. If VL=16, we just do the regular +// computation without vpclmulqdq, as it's the faster method for a single tweak. +.macro _next_tweakvec src, tmp1, tmp2, dst +.if VL == 16 + _next_tweak \src, \tmp1, \dst +.else + vpsrlq $64 - VL/16, \src, \tmp1 + vpclmulqdq $0x01, GF_POLY, \tmp1, \tmp2 + vpslldq $8, \tmp1, \tmp1 + vpsllq $VL/16, \src, \dst + _xor3 \tmp1, \tmp2, \dst +.endif +.endm + +// Given the first XTS tweak at (TWEAK), compute the first set of tweaks and +// store them in the vector registers TWEAK0-TWEAK3. Clobbers V0-V5. +.macro _compute_first_set_of_tweaks + vmovdqu (TWEAK), TWEAK0_XMM + _vbroadcast128 .Lgf_poly(%rip), GF_POLY +.if VL == 16 + // With VL=16, multiplying by x serially is fastest. + _next_tweak TWEAK0, %xmm0, TWEAK1 + _next_tweak TWEAK1, %xmm0, TWEAK2 + _next_tweak TWEAK2, %xmm0, TWEAK3 +.else +.if VL == 32 + // Compute the second block of TWEAK0. + _next_tweak TWEAK0_XMM, %xmm0, %xmm1 + vinserti128 $1, %xmm1, TWEAK0, TWEAK0 +.elseif VL == 64 + // Compute the remaining blocks of TWEAK0. + _next_tweak TWEAK0_XMM, %xmm0, %xmm1 + _next_tweak %xmm1, %xmm0, %xmm2 + _next_tweak %xmm2, %xmm0, %xmm3 + vinserti32x4 $1, %xmm1, TWEAK0, TWEAK0 + vinserti32x4 $2, %xmm2, TWEAK0, TWEAK0 + vinserti32x4 $3, %xmm3, TWEAK0, TWEAK0 +.endif + // Compute TWEAK[1-3] from TWEAK0. + vpsrlq $64 - 1*VL/16, TWEAK0, V0 + vpsrlq $64 - 2*VL/16, TWEAK0, V2 + vpsrlq $64 - 3*VL/16, TWEAK0, V4 + vpclmulqdq $0x01, GF_POLY, V0, V1 + vpclmulqdq $0x01, GF_POLY, V2, V3 + vpclmulqdq $0x01, GF_POLY, V4, V5 + vpslldq $8, V0, V0 + vpslldq $8, V2, V2 + vpslldq $8, V4, V4 + vpsllq $1*VL/16, TWEAK0, TWEAK1 + vpsllq $2*VL/16, TWEAK0, TWEAK2 + vpsllq $3*VL/16, TWEAK0, TWEAK3 +.if USE_AVX10 + vpternlogd $0x96, V0, V1, TWEAK1 + vpternlogd $0x96, V2, V3, TWEAK2 + vpternlogd $0x96, V4, V5, TWEAK3 +.else + vpxor V0, TWEAK1, TWEAK1 + vpxor V2, TWEAK2, TWEAK2 + vpxor V4, TWEAK3, TWEAK3 + vpxor V1, TWEAK1, TWEAK1 + vpxor V3, TWEAK2, TWEAK2 + vpxor V5, TWEAK3, TWEAK3 +.endif +.endif +.endm + +// Do one step in computing the next set of tweaks using the method of just +// multiplying by x repeatedly (the same method _next_tweak uses). +.macro _tweak_step_mulx i +.if \i == 0 + .set PREV_TWEAK, TWEAK3 + .set NEXT_TWEAK, NEXT_TWEAK0 +.elseif \i == 5 + .set PREV_TWEAK, NEXT_TWEAK0 + .set NEXT_TWEAK, NEXT_TWEAK1 +.elseif \i == 10 + .set PREV_TWEAK, NEXT_TWEAK1 + .set NEXT_TWEAK, NEXT_TWEAK2 +.elseif \i == 15 + .set PREV_TWEAK, NEXT_TWEAK2 + .set NEXT_TWEAK, NEXT_TWEAK3 +.endif +.if \i >= 0 && \i < 20 && \i % 5 == 0 + vpshufd $0x13, PREV_TWEAK, V5 +.elseif \i >= 0 && \i < 20 && \i % 5 == 1 + vpaddq PREV_TWEAK, PREV_TWEAK, NEXT_TWEAK +.elseif \i >= 0 && \i < 20 && \i % 5 == 2 + vpsrad $31, V5, V5 +.elseif \i >= 0 && \i < 20 && \i % 5 == 3 + vpand GF_POLY, V5, V5 +.elseif \i >= 0 && \i < 20 && \i % 5 == 4 + vpxor V5, NEXT_TWEAK, NEXT_TWEAK +.elseif \i == 1000 + vmovdqa NEXT_TWEAK0, TWEAK0 + vmovdqa NEXT_TWEAK1, TWEAK1 + vmovdqa NEXT_TWEAK2, TWEAK2 + vmovdqa NEXT_TWEAK3, TWEAK3 +.endif +.endm + +// Do one step in computing the next set of tweaks using the VPCLMULQDQ method +// (the same method _next_tweakvec uses for VL > 16). This means multiplying +// each tweak by x^(4*VL/16) independently. Since 4*VL/16 is a multiple of 8 +// when VL > 16 (which it is here), the needed shift amounts are byte-aligned, +// which allows the use of vpsrldq and vpslldq to do 128-bit wide shifts. +.macro _tweak_step_pclmul i +.if \i == 0 + vpsrldq $(128 - 4*VL/16) / 8, TWEAK0, NEXT_TWEAK0 +.elseif \i == 2 + vpsrldq $(128 - 4*VL/16) / 8, TWEAK1, NEXT_TWEAK1 +.elseif \i == 4 + vpsrldq $(128 - 4*VL/16) / 8, TWEAK2, NEXT_TWEAK2 +.elseif \i == 6 + vpsrldq $(128 - 4*VL/16) / 8, TWEAK3, NEXT_TWEAK3 +.elseif \i == 8 + vpclmulqdq $0x00, GF_POLY, NEXT_TWEAK0, NEXT_TWEAK0 +.elseif \i == 10 + vpclmulqdq $0x00, GF_POLY, NEXT_TWEAK1, NEXT_TWEAK1 +.elseif \i == 12 + vpclmulqdq $0x00, GF_POLY, NEXT_TWEAK2, NEXT_TWEAK2 +.elseif \i == 14 + vpclmulqdq $0x00, GF_POLY, NEXT_TWEAK3, NEXT_TWEAK3 +.elseif \i == 1000 + vpslldq $(4*VL/16) / 8, TWEAK0, TWEAK0 + vpslldq $(4*VL/16) / 8, TWEAK1, TWEAK1 + vpslldq $(4*VL/16) / 8, TWEAK2, TWEAK2 + vpslldq $(4*VL/16) / 8, TWEAK3, TWEAK3 + _vpxor NEXT_TWEAK0, TWEAK0, TWEAK0 + _vpxor NEXT_TWEAK1, TWEAK1, TWEAK1 + _vpxor NEXT_TWEAK2, TWEAK2, TWEAK2 + _vpxor NEXT_TWEAK3, TWEAK3, TWEAK3 +.endif +.endm + +// _tweak_step does one step of the computation of the next set of tweaks from +// TWEAK[0-3]. To complete all steps, this is invoked with increasing values of +// \i that include at least 0 through 19, then 1000 which signals the last step. +// +// This is used to interleave the computation of the next set of tweaks with the +// AES en/decryptions, which increases performance in some cases. +.macro _tweak_step i +.if VL == 16 + _tweak_step_mulx \i +.else + _tweak_step_pclmul \i +.endif +.endm + +.macro _setup_round_keys enc + + // Select either the encryption round keys or the decryption round keys. +.if \enc + .set OFFS, 0 +.else + .set OFFS, 240 +.endif + + // Load the round key for "round 0". + _vbroadcast128 OFFS(KEY), KEY0 + + // Increment KEY to make it so that 7*16(KEY) is the last round key. + // For AES-128, increment by 3*16, resulting in the 10 round keys (not + // counting the zero-th round key which was just loaded into KEY0) being + // -2*16(KEY) through 7*16(KEY). For AES-192, increment by 5*16 and use + // 12 round keys -4*16(KEY) through 7*16(KEY). For AES-256, increment + // by 7*16 and use 14 round keys -6*16(KEY) through 7*16(KEY). + // + // This rebasing provides two benefits. First, it makes the offset to + // any round key be in the range [-96, 112], fitting in a signed byte. + // This shortens VEX-encoded instructions that access the later round + // keys which otherwise would need 4-byte offsets. Second, it makes it + // easy to do AES-128 and AES-192 by skipping irrelevant rounds at the + // beginning. Skipping rounds at the end doesn't work as well because + // the last round needs different instructions. + // + // An alternative approach would be to roll up all the round loops. We + // don't do that because it isn't compatible with caching the round keys + // in registers which we do when possible (see below), and also because + // it seems unwise to rely *too* heavily on the CPU's branch predictor. + lea OFFS-16(KEY, KEYLEN64, 4), KEY + + // If all 32 SIMD registers are available, cache all the round keys. +.if USE_AVX10 + cmp $24, KEYLEN + jl .Laes128\@ + je .Laes192\@ + _vbroadcast128 -6*16(KEY), KEY1 + _vbroadcast128 -5*16(KEY), KEY2 +.Laes192\@: + _vbroadcast128 -4*16(KEY), KEY3 + _vbroadcast128 -3*16(KEY), KEY4 +.Laes128\@: + _vbroadcast128 -2*16(KEY), KEY5 + _vbroadcast128 -1*16(KEY), KEY6 + _vbroadcast128 0*16(KEY), KEY7 + _vbroadcast128 1*16(KEY), KEY8 + _vbroadcast128 2*16(KEY), KEY9 + _vbroadcast128 3*16(KEY), KEY10 + _vbroadcast128 4*16(KEY), KEY11 + _vbroadcast128 5*16(KEY), KEY12 + _vbroadcast128 6*16(KEY), KEY13 + _vbroadcast128 7*16(KEY), KEY14 +.endif +.endm + +// Do a single round of AES encryption (if \enc==1) or decryption (if \enc==0) +// on the block(s) in \data using the round key(s) in \key. The register length +// determines the number of AES blocks en/decrypted. +.macro _vaes enc, last, key, data +.if \enc +.if \last + vaesenclast \key, \data, \data +.else + vaesenc \key, \data, \data +.endif +.else +.if \last + vaesdeclast \key, \data, \data +.else + vaesdec \key, \data, \data +.endif +.endif +.endm + +// Do a single round of AES en/decryption on the block(s) in \data, using the +// same key for all block(s). The round key is loaded from the appropriate +// register or memory location for round \i. May clobber V4. +.macro _vaes_1x enc, last, i, xmm_suffix, data +.if USE_AVX10 + _vaes \enc, \last, KEY\i\xmm_suffix, \data +.else +.ifnb \xmm_suffix + _vaes \enc, \last, (\i-7)*16(KEY), \data +.else + _vbroadcast128 (\i-7)*16(KEY), V4 + _vaes \enc, \last, V4, \data +.endif +.endif +.endm + +// Do a single round of AES en/decryption on the blocks in registers V0-V3, +// using the same key for all blocks. The round key is loaded from the +// appropriate register or memory location for round \i. In addition, does two +// steps of the computation of the next set of tweaks. May clobber V4. +.macro _vaes_4x enc, last, i +.if USE_AVX10 + _tweak_step (2*(\i-5)) + _vaes \enc, \last, KEY\i, V0 + _vaes \enc, \last, KEY\i, V1 + _tweak_step (2*(\i-5) + 1) + _vaes \enc, \last, KEY\i, V2 + _vaes \enc, \last, KEY\i, V3 +.else + _vbroadcast128 (\i-7)*16(KEY), V4 + _tweak_step (2*(\i-5)) + _vaes \enc, \last, V4, V0 + _vaes \enc, \last, V4, V1 + _tweak_step (2*(\i-5) + 1) + _vaes \enc, \last, V4, V2 + _vaes \enc, \last, V4, V3 +.endif +.endm + +// Do tweaked AES en/decryption (i.e., XOR with \tweak, then AES en/decrypt, +// then XOR with \tweak again) of the block(s) in \data. To process a single +// block, use xmm registers and set \xmm_suffix=_XMM. To process a vector of +// length VL, use V* registers and leave \xmm_suffix empty. May clobber V4. +.macro _aes_crypt enc, xmm_suffix, tweak, data + _xor3 KEY0\xmm_suffix, \tweak, \data + cmp $24, KEYLEN + jl .Laes128\@ + je .Laes192\@ + _vaes_1x \enc, 0, 1, \xmm_suffix, \data + _vaes_1x \enc, 0, 2, \xmm_suffix, \data +.Laes192\@: + _vaes_1x \enc, 0, 3, \xmm_suffix, \data + _vaes_1x \enc, 0, 4, \xmm_suffix, \data +.Laes128\@: + _vaes_1x \enc, 0, 5, \xmm_suffix, \data + _vaes_1x \enc, 0, 6, \xmm_suffix, \data + _vaes_1x \enc, 0, 7, \xmm_suffix, \data + _vaes_1x \enc, 0, 8, \xmm_suffix, \data + _vaes_1x \enc, 0, 9, \xmm_suffix, \data + _vaes_1x \enc, 0, 10, \xmm_suffix, \data + _vaes_1x \enc, 0, 11, \xmm_suffix, \data + _vaes_1x \enc, 0, 12, \xmm_suffix, \data + _vaes_1x \enc, 0, 13, \xmm_suffix, \data + _vaes_1x \enc, 1, 14, \xmm_suffix, \data + _vpxor \tweak, \data, \data +.endm + +.macro _aes_xts_crypt enc + _define_aliases + +.if !\enc + // When decrypting a message whose length isn't a multiple of the AES + // block length, exclude the last full block from the main loop by + // subtracting 16 from LEN. This is needed because ciphertext stealing + // decryption uses the last two tweaks in reverse order. We'll handle + // the last full block and the partial block specially at the end. + lea -16(LEN), %eax + test $15, LEN8 + cmovnz %eax, LEN +.endif + + // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256). + movl 480(KEY), KEYLEN + + // Setup the pointer to the round keys and cache as many as possible. + _setup_round_keys \enc + + // Compute the first set of tweaks TWEAK[0-3]. + _compute_first_set_of_tweaks + + sub $4*VL, LEN + jl .Lhandle_remainder\@ + +.Lmain_loop\@: + // This is the main loop, en/decrypting 4*VL bytes per iteration. + + // XOR each source block with its tweak and the zero-th round key. +.if USE_AVX10 + vmovdqu8 0*VL(SRC), V0 + vmovdqu8 1*VL(SRC), V1 + vmovdqu8 2*VL(SRC), V2 + vmovdqu8 3*VL(SRC), V3 + vpternlogd $0x96, TWEAK0, KEY0, V0 + vpternlogd $0x96, TWEAK1, KEY0, V1 + vpternlogd $0x96, TWEAK2, KEY0, V2 + vpternlogd $0x96, TWEAK3, KEY0, V3 +.else + vpxor 0*VL(SRC), KEY0, V0 + vpxor 1*VL(SRC), KEY0, V1 + vpxor 2*VL(SRC), KEY0, V2 + vpxor 3*VL(SRC), KEY0, V3 + vpxor TWEAK0, V0, V0 + vpxor TWEAK1, V1, V1 + vpxor TWEAK2, V2, V2 + vpxor TWEAK3, V3, V3 +.endif + cmp $24, KEYLEN + jl .Laes128\@ + je .Laes192\@ + // Do all the AES rounds on the data blocks, interleaved with + // the computation of the next set of tweaks. + _vaes_4x \enc, 0, 1 + _vaes_4x \enc, 0, 2 +.Laes192\@: + _vaes_4x \enc, 0, 3 + _vaes_4x \enc, 0, 4 +.Laes128\@: + _vaes_4x \enc, 0, 5 + _vaes_4x \enc, 0, 6 + _vaes_4x \enc, 0, 7 + _vaes_4x \enc, 0, 8 + _vaes_4x \enc, 0, 9 + _vaes_4x \enc, 0, 10 + _vaes_4x \enc, 0, 11 + _vaes_4x \enc, 0, 12 + _vaes_4x \enc, 0, 13 + _vaes_4x \enc, 1, 14 + + // XOR in the tweaks again. + _vpxor TWEAK0, V0, V0 + _vpxor TWEAK1, V1, V1 + _vpxor TWEAK2, V2, V2 + _vpxor TWEAK3, V3, V3 + + // Store the destination blocks. + _vmovdqu V0, 0*VL(DST) + _vmovdqu V1, 1*VL(DST) + _vmovdqu V2, 2*VL(DST) + _vmovdqu V3, 3*VL(DST) + + // Finish computing the next set of tweaks. + _tweak_step 1000 + + add $4*VL, SRC + add $4*VL, DST + sub $4*VL, LEN + jge .Lmain_loop\@ + + // Check for the uncommon case where the data length isn't a multiple of + // 4*VL. Handle it out-of-line in order to optimize for the common + // case. In the common case, just fall through to the ret. + test $4*VL-1, LEN8 + jnz .Lhandle_remainder\@ +.Ldone\@: + // Store the next tweak back to *TWEAK to support continuation calls. + vmovdqu TWEAK0_XMM, (TWEAK) +.if VL > 16 + vzeroupper +.endif + RET + +.Lhandle_remainder\@: + + // En/decrypt any remaining full blocks, one vector at a time. +.if VL > 16 + add $3*VL, LEN // Undo extra sub of 4*VL, then sub VL. + jl .Lvec_at_a_time_done\@ +.Lvec_at_a_time\@: + _vmovdqu (SRC), V0 + _aes_crypt \enc, , TWEAK0, V0 + _vmovdqu V0, (DST) + _next_tweakvec TWEAK0, V0, V1, TWEAK0 + add $VL, SRC + add $VL, DST + sub $VL, LEN + jge .Lvec_at_a_time\@ +.Lvec_at_a_time_done\@: + add $VL-16, LEN // Undo extra sub of VL, then sub 16. +.else + add $4*VL-16, LEN // Undo extra sub of 4*VL, then sub 16. +.endif + + // En/decrypt any remaining full blocks, one at a time. + jl .Lblock_at_a_time_done\@ +.Lblock_at_a_time\@: + vmovdqu (SRC), %xmm0 + _aes_crypt \enc, _XMM, TWEAK0_XMM, %xmm0 + vmovdqu %xmm0, (DST) + _next_tweak TWEAK0_XMM, %xmm0, TWEAK0_XMM + add $16, SRC + add $16, DST + sub $16, LEN + jge .Lblock_at_a_time\@ +.Lblock_at_a_time_done\@: + add $16, LEN // Undo the extra sub of 16. + // Now 0 <= LEN <= 15. If LEN is zero, we're done. + jz .Ldone\@ + + // Otherwise 1 <= LEN <= 15, but the real remaining length is 16 + LEN. + // Do ciphertext stealing to process the last 16 + LEN bytes. + +.if \enc + // If encrypting, the main loop already encrypted the last full block to + // create the CTS intermediate ciphertext. Prepare for the rest of CTS + // by rewinding the pointers and loading the intermediate ciphertext. + sub $16, SRC + sub $16, DST + vmovdqu (DST), %xmm0 +.else + // If decrypting, the main loop didn't decrypt the last full block + // because CTS decryption uses the last two tweaks in reverse order. + // Do it now by advancing the tweak and decrypting the last full block. + _next_tweak TWEAK0_XMM, %xmm0, TWEAK1_XMM + vmovdqu (SRC), %xmm0 + _aes_crypt \enc, _XMM, TWEAK1_XMM, %xmm0 +.endif + +.if USE_AVX10 + // Create a mask that has the first LEN bits set. + mov $-1, %r9d + bzhi LEN, %r9d, %r9d + kmovd %r9d, %k1 + + // Swap the first LEN bytes of the en/decryption of the last full block + // with the partial block. Note that to support in-place en/decryption, + // the load from the src partial block must happen before the store to + // the dst partial block. + vmovdqa %xmm0, %xmm1 + vmovdqu8 16(SRC), %xmm0{%k1} + vmovdqu8 %xmm1, 16(DST){%k1} +.else + lea .Lcts_permute_table(%rip), %r9 + + // Load the src partial block, left-aligned. Note that to support + // in-place en/decryption, this must happen before the store to the dst + // partial block. + vmovdqu (SRC, LEN64, 1), %xmm1 + + // Shift the first LEN bytes of the en/decryption of the last full block + // to the end of a register, then store it to DST+LEN. This stores the + // dst partial block. It also writes to the second part of the dst last + // full block, but that part is overwritten later. + vpshufb (%r9, LEN64, 1), %xmm0, %xmm2 + vmovdqu %xmm2, (DST, LEN64, 1) + + // Make xmm3 contain [16-LEN,16-LEN+1,...,14,15,0x80,0x80,...]. + sub LEN64, %r9 + vmovdqu 32(%r9), %xmm3 + + // Shift the src partial block to the beginning of its register. + vpshufb %xmm3, %xmm1, %xmm1 + + // Do a blend to generate the src partial block followed by the second + // part of the en/decryption of the last full block. + vpblendvb %xmm3, %xmm0, %xmm1, %xmm0 +.endif + // En/decrypt again and store the last full block. + _aes_crypt \enc, _XMM, TWEAK0_XMM, %xmm0 + vmovdqu %xmm0, (DST) + jmp .Ldone\@ +.endm + +// void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, +// u8 iv[AES_BLOCK_SIZE]); +SYM_TYPED_FUNC_START(aes_xts_encrypt_iv) + vmovdqu (%rsi), %xmm0 + vpxor (%rdi), %xmm0, %xmm0 + movl 480(%rdi), %eax // AES key length + lea -16(%rdi, %rax, 4), %rdi + cmp $24, %eax + jl .Lencrypt_iv_aes128 + je .Lencrypt_iv_aes192 + vaesenc -6*16(%rdi), %xmm0, %xmm0 + vaesenc -5*16(%rdi), %xmm0, %xmm0 +.Lencrypt_iv_aes192: + vaesenc -4*16(%rdi), %xmm0, %xmm0 + vaesenc -3*16(%rdi), %xmm0, %xmm0 +.Lencrypt_iv_aes128: + vaesenc -2*16(%rdi), %xmm0, %xmm0 + vaesenc -1*16(%rdi), %xmm0, %xmm0 + vaesenc 0*16(%rdi), %xmm0, %xmm0 + vaesenc 1*16(%rdi), %xmm0, %xmm0 + vaesenc 2*16(%rdi), %xmm0, %xmm0 + vaesenc 3*16(%rdi), %xmm0, %xmm0 + vaesenc 4*16(%rdi), %xmm0, %xmm0 + vaesenc 5*16(%rdi), %xmm0, %xmm0 + vaesenc 6*16(%rdi), %xmm0, %xmm0 + vaesenclast 7*16(%rdi), %xmm0, %xmm0 + vmovdqu %xmm0, (%rsi) + RET +SYM_FUNC_END(aes_xts_encrypt_iv) + +// Below are the actual AES-XTS encryption and decryption functions, +// instantiated from the above macro. They all have the following prototype: +// +// void (*xts_asm_func)(const struct crypto_aes_ctx *key, +// const u8 *src, u8 *dst, unsigned int len, +// u8 tweak[AES_BLOCK_SIZE]); +// +// |key| is the data key. |tweak| contains the next tweak; the encryption of +// the original IV with the tweak key was already done. This function supports +// incremental computation, but |len| must always be >= 16 (AES_BLOCK_SIZE), and +// |len| must be a multiple of 16 except on the last call. If |len| is a +// multiple of 16, then this function updates |tweak| to contain the next tweak. + +.set VL, 16 +.set USE_AVX10, 0 +SYM_TYPED_FUNC_START(aes_xts_encrypt_aesni_avx) + _aes_xts_crypt 1 +SYM_FUNC_END(aes_xts_encrypt_aesni_avx) +SYM_TYPED_FUNC_START(aes_xts_decrypt_aesni_avx) + _aes_xts_crypt 0 +SYM_FUNC_END(aes_xts_decrypt_aesni_avx) + +#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) +.set VL, 32 +.set USE_AVX10, 0 +SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx2) + _aes_xts_crypt 1 +SYM_FUNC_END(aes_xts_encrypt_vaes_avx2) +SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx2) + _aes_xts_crypt 0 +SYM_FUNC_END(aes_xts_decrypt_vaes_avx2) + +.set VL, 32 +.set USE_AVX10, 1 +SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx10_256) + _aes_xts_crypt 1 +SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_256) +SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_256) + _aes_xts_crypt 0 +SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_256) + +.set VL, 64 +.set USE_AVX10, 1 +SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx10_512) + _aes_xts_crypt 1 +SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_512) +SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_512) + _aes_xts_crypt 0 +SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_512) +#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 411d8c83e88a..e1c0b7da59d3 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -2826,28 +2826,24 @@ SYM_FUNC_END(aesni_ctr_enc) .previous /* - * _aesni_gf128mul_x_ble: internal ABI - * Multiply in GF(2^128) for XTS IVs + * _aesni_gf128mul_x_ble: Multiply in GF(2^128) for XTS IVs * input: * IV: current IV * GF128MUL_MASK == mask with 0x87 and 0x01 * output: * IV: next IV * changed: - * CTR: == temporary value + * KEY: == temporary value */ -#define _aesni_gf128mul_x_ble() \ - pshufd $0x13, IV, KEY; \ - paddq IV, IV; \ - psrad $31, KEY; \ - pand GF128MUL_MASK, KEY; \ - pxor KEY, IV; +.macro _aesni_gf128mul_x_ble + pshufd $0x13, IV, KEY + paddq IV, IV + psrad $31, KEY + pand GF128MUL_MASK, KEY + pxor KEY, IV +.endm -/* - * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst, - * const u8 *src, unsigned int len, le128 *iv) - */ -SYM_FUNC_START(aesni_xts_encrypt) +.macro _aesni_xts_crypt enc FRAME_BEGIN #ifndef __x86_64__ pushl IVP @@ -2866,35 +2862,46 @@ SYM_FUNC_START(aesni_xts_encrypt) movups (IVP), IV mov 480(KEYP), KLEN +.if !\enc + add $240, KEYP -.Lxts_enc_loop4: + test $15, LEN + jz .Lxts_loop4\@ + sub $16, LEN +.endif + +.Lxts_loop4\@: sub $64, LEN - jl .Lxts_enc_1x + jl .Lxts_1x\@ movdqa IV, STATE1 movdqu 0x00(INP), IN pxor IN, STATE1 movdqu IV, 0x00(OUTP) - _aesni_gf128mul_x_ble() + _aesni_gf128mul_x_ble movdqa IV, STATE2 movdqu 0x10(INP), IN pxor IN, STATE2 movdqu IV, 0x10(OUTP) - _aesni_gf128mul_x_ble() + _aesni_gf128mul_x_ble movdqa IV, STATE3 movdqu 0x20(INP), IN pxor IN, STATE3 movdqu IV, 0x20(OUTP) - _aesni_gf128mul_x_ble() + _aesni_gf128mul_x_ble movdqa IV, STATE4 movdqu 0x30(INP), IN pxor IN, STATE4 movdqu IV, 0x30(OUTP) +.if \enc call _aesni_enc4 +.else + call _aesni_dec4 +.endif movdqu 0x00(OUTP), IN pxor IN, STATE1 @@ -2912,17 +2919,17 @@ SYM_FUNC_START(aesni_xts_encrypt) pxor IN, STATE4 movdqu STATE4, 0x30(OUTP) - _aesni_gf128mul_x_ble() + _aesni_gf128mul_x_ble add $64, INP add $64, OUTP test LEN, LEN - jnz .Lxts_enc_loop4 + jnz .Lxts_loop4\@ -.Lxts_enc_ret_iv: +.Lxts_ret_iv\@: movups IV, (IVP) -.Lxts_enc_ret: +.Lxts_ret\@: #ifndef __x86_64__ popl KLEN popl KEYP @@ -2932,201 +2939,60 @@ SYM_FUNC_START(aesni_xts_encrypt) FRAME_END RET -.Lxts_enc_1x: +.Lxts_1x\@: add $64, LEN - jz .Lxts_enc_ret_iv + jz .Lxts_ret_iv\@ +.if \enc sub $16, LEN - jl .Lxts_enc_cts4 + jl .Lxts_cts4\@ +.endif -.Lxts_enc_loop1: +.Lxts_loop1\@: movdqu (INP), STATE +.if \enc pxor IV, STATE call _aesni_enc1 - pxor IV, STATE - _aesni_gf128mul_x_ble() - - test LEN, LEN - jz .Lxts_enc_out - +.else add $16, INP sub $16, LEN - jl .Lxts_enc_cts1 - - movdqu STATE, (OUTP) - add $16, OUTP - jmp .Lxts_enc_loop1 - -.Lxts_enc_out: - movdqu STATE, (OUTP) - jmp .Lxts_enc_ret_iv - -.Lxts_enc_cts4: - movdqa STATE4, STATE - sub $16, OUTP - -.Lxts_enc_cts1: -#ifndef __x86_64__ - lea .Lcts_permute_table, T1 -#else - lea .Lcts_permute_table(%rip), T1 -#endif - add LEN, INP /* rewind input pointer */ - add $16, LEN /* # bytes in final block */ - movups (INP), IN1 - - mov T1, IVP - add $32, IVP - add LEN, T1 - sub LEN, IVP - add OUTP, LEN - - movups (T1), %xmm4 - movaps STATE, IN2 - pshufb %xmm4, STATE - movups STATE, (LEN) - - movups (IVP), %xmm0 - pshufb %xmm0, IN1 - pblendvb IN2, IN1 - movaps IN1, STATE - + jl .Lxts_cts1\@ pxor IV, STATE - call _aesni_enc1 + call _aesni_dec1 +.endif pxor IV, STATE + _aesni_gf128mul_x_ble - movups STATE, (OUTP) - jmp .Lxts_enc_ret -SYM_FUNC_END(aesni_xts_encrypt) - -/* - * void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst, - * const u8 *src, unsigned int len, le128 *iv) - */ -SYM_FUNC_START(aesni_xts_decrypt) - FRAME_BEGIN -#ifndef __x86_64__ - pushl IVP - pushl LEN - pushl KEYP - pushl KLEN - movl (FRAME_OFFSET+20)(%esp), KEYP # ctx - movl (FRAME_OFFSET+24)(%esp), OUTP # dst - movl (FRAME_OFFSET+28)(%esp), INP # src - movl (FRAME_OFFSET+32)(%esp), LEN # len - movl (FRAME_OFFSET+36)(%esp), IVP # iv - movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK -#else - movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK -#endif - movups (IVP), IV - - mov 480(KEYP), KLEN - add $240, KEYP - - test $15, LEN - jz .Lxts_dec_loop4 - sub $16, LEN - -.Lxts_dec_loop4: - sub $64, LEN - jl .Lxts_dec_1x - - movdqa IV, STATE1 - movdqu 0x00(INP), IN - pxor IN, STATE1 - movdqu IV, 0x00(OUTP) - - _aesni_gf128mul_x_ble() - movdqa IV, STATE2 - movdqu 0x10(INP), IN - pxor IN, STATE2 - movdqu IV, 0x10(OUTP) - - _aesni_gf128mul_x_ble() - movdqa IV, STATE3 - movdqu 0x20(INP), IN - pxor IN, STATE3 - movdqu IV, 0x20(OUTP) - - _aesni_gf128mul_x_ble() - movdqa IV, STATE4 - movdqu 0x30(INP), IN - pxor IN, STATE4 - movdqu IV, 0x30(OUTP) - - call _aesni_dec4 - - movdqu 0x00(OUTP), IN - pxor IN, STATE1 - movdqu STATE1, 0x00(OUTP) - - movdqu 0x10(OUTP), IN - pxor IN, STATE2 - movdqu STATE2, 0x10(OUTP) - - movdqu 0x20(OUTP), IN - pxor IN, STATE3 - movdqu STATE3, 0x20(OUTP) - - movdqu 0x30(OUTP), IN - pxor IN, STATE4 - movdqu STATE4, 0x30(OUTP) - - _aesni_gf128mul_x_ble() - - add $64, INP - add $64, OUTP test LEN, LEN - jnz .Lxts_dec_loop4 - -.Lxts_dec_ret_iv: - movups IV, (IVP) - -.Lxts_dec_ret: -#ifndef __x86_64__ - popl KLEN - popl KEYP - popl LEN - popl IVP -#endif - FRAME_END - RET - -.Lxts_dec_1x: - add $64, LEN - jz .Lxts_dec_ret_iv - -.Lxts_dec_loop1: - movdqu (INP), STATE + jz .Lxts_out\@ +.if \enc add $16, INP sub $16, LEN - jl .Lxts_dec_cts1 - - pxor IV, STATE - call _aesni_dec1 - pxor IV, STATE - _aesni_gf128mul_x_ble() - - test LEN, LEN - jz .Lxts_dec_out + jl .Lxts_cts1\@ +.endif movdqu STATE, (OUTP) add $16, OUTP - jmp .Lxts_dec_loop1 + jmp .Lxts_loop1\@ -.Lxts_dec_out: +.Lxts_out\@: movdqu STATE, (OUTP) - jmp .Lxts_dec_ret_iv + jmp .Lxts_ret_iv\@ -.Lxts_dec_cts1: +.if \enc +.Lxts_cts4\@: + movdqa STATE4, STATE + sub $16, OUTP +.Lxts_cts1\@: +.else +.Lxts_cts1\@: movdqa IV, STATE4 - _aesni_gf128mul_x_ble() + _aesni_gf128mul_x_ble pxor IV, STATE call _aesni_dec1 pxor IV, STATE - +.endif #ifndef __x86_64__ lea .Lcts_permute_table, T1 #else @@ -3152,10 +3018,32 @@ SYM_FUNC_START(aesni_xts_decrypt) pblendvb IN2, IN1 movaps IN1, STATE +.if \enc + pxor IV, STATE + call _aesni_enc1 + pxor IV, STATE +.else pxor STATE4, STATE call _aesni_dec1 pxor STATE4, STATE +.endif movups STATE, (OUTP) - jmp .Lxts_dec_ret -SYM_FUNC_END(aesni_xts_decrypt) + jmp .Lxts_ret\@ +.endm + +/* + * void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst, + * const u8 *src, unsigned int len, le128 *iv) + */ +SYM_FUNC_START(aesni_xts_enc) + _aesni_xts_crypt 1 +SYM_FUNC_END(aesni_xts_enc) + +/* + * void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *dst, + * const u8 *src, unsigned int len, le128 *iv) + */ +SYM_FUNC_START(aesni_xts_dec) + _aesni_xts_crypt 0 +SYM_FUNC_END(aesni_xts_dec) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index b1d90c25975a..213fed27086d 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -107,11 +107,11 @@ asmlinkage void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, #define AVX_GEN2_OPTSIZE 640 #define AVX_GEN4_OPTSIZE 4096 -asmlinkage void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, unsigned int len, u8 *iv); +asmlinkage void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); -asmlinkage void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, unsigned int len, u8 *iv); +asmlinkage void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); #ifdef CONFIG_X86_64 @@ -877,7 +877,7 @@ static int helper_rfc4106_decrypt(struct aead_request *req) } #endif -static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key, +static int xts_setkey_aesni(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); @@ -898,108 +898,152 @@ static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key, return aes_set_key_common(&ctx->tweak_ctx, key + keylen, keylen); } -static int xts_crypt(struct skcipher_request *req, bool encrypt) +typedef void (*xts_encrypt_iv_func)(const struct crypto_aes_ctx *tweak_key, + u8 iv[AES_BLOCK_SIZE]); +typedef void (*xts_crypt_func)(const struct crypto_aes_ctx *key, + const u8 *src, u8 *dst, unsigned int len, + u8 tweak[AES_BLOCK_SIZE]); + +/* This handles cases where the source and/or destination span pages. */ +static noinline int +xts_crypt_slowpath(struct skcipher_request *req, xts_crypt_func crypt_func) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); + const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); int tail = req->cryptlen % AES_BLOCK_SIZE; + struct scatterlist sg_src[2], sg_dst[2]; struct skcipher_request subreq; struct skcipher_walk walk; + struct scatterlist *src, *dst; int err; - if (req->cryptlen < AES_BLOCK_SIZE) - return -EINVAL; - - err = skcipher_walk_virt(&walk, req, false); - if (!walk.nbytes) - return err; - - if (unlikely(tail > 0 && walk.nbytes < walk.total)) { - int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; - - skcipher_walk_abort(&walk); - + /* + * If the message length isn't divisible by the AES block size, then + * separate off the last full block and the partial block. This ensures + * that they are processed in the same call to the assembly function, + * which is required for ciphertext stealing. + */ + if (tail) { skcipher_request_set_tfm(&subreq, tfm); skcipher_request_set_callback(&subreq, skcipher_request_flags(req), NULL, NULL); skcipher_request_set_crypt(&subreq, req->src, req->dst, - blocks * AES_BLOCK_SIZE, req->iv); + req->cryptlen - tail - AES_BLOCK_SIZE, + req->iv); req = &subreq; - - err = skcipher_walk_virt(&walk, req, false); - if (!walk.nbytes) - return err; - } else { - tail = 0; } - kernel_fpu_begin(); - - /* calculate first value of T */ - aesni_enc(&ctx->tweak_ctx, walk.iv, walk.iv); + err = skcipher_walk_virt(&walk, req, false); - while (walk.nbytes > 0) { - int nbytes = walk.nbytes; + while (walk.nbytes) { + unsigned int nbytes = walk.nbytes; if (nbytes < walk.total) - nbytes &= ~(AES_BLOCK_SIZE - 1); - - if (encrypt) - aesni_xts_encrypt(&ctx->crypt_ctx, - walk.dst.virt.addr, walk.src.virt.addr, - nbytes, walk.iv); - else - aesni_xts_decrypt(&ctx->crypt_ctx, - walk.dst.virt.addr, walk.src.virt.addr, - nbytes, walk.iv); - kernel_fpu_end(); + nbytes = round_down(nbytes, AES_BLOCK_SIZE); + kernel_fpu_begin(); + (*crypt_func)(&ctx->crypt_ctx, walk.src.virt.addr, + walk.dst.virt.addr, nbytes, req->iv); + kernel_fpu_end(); err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - - if (walk.nbytes > 0) - kernel_fpu_begin(); } - if (unlikely(tail > 0 && !err)) { - struct scatterlist sg_src[2], sg_dst[2]; - struct scatterlist *src, *dst; + if (err || !tail) + return err; - dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); - if (req->dst != req->src) - dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + /* Do ciphertext stealing with the last full block and partial block. */ - skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, - req->iv); + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); - err = skcipher_walk_virt(&walk, &subreq, false); - if (err) - return err; + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); - kernel_fpu_begin(); - if (encrypt) - aesni_xts_encrypt(&ctx->crypt_ctx, - walk.dst.virt.addr, walk.src.virt.addr, - walk.nbytes, walk.iv); - else - aesni_xts_decrypt(&ctx->crypt_ctx, - walk.dst.virt.addr, walk.src.virt.addr, - walk.nbytes, walk.iv); - kernel_fpu_end(); + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; - err = skcipher_walk_done(&walk, 0); + kernel_fpu_begin(); + (*crypt_func)(&ctx->crypt_ctx, walk.src.virt.addr, walk.dst.virt.addr, + walk.nbytes, req->iv); + kernel_fpu_end(); + + return skcipher_walk_done(&walk, 0); +} + +/* __always_inline to avoid indirect call in fastpath */ +static __always_inline int +xts_crypt(struct skcipher_request *req, xts_encrypt_iv_func encrypt_iv, + xts_crypt_func crypt_func) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); + const unsigned int cryptlen = req->cryptlen; + struct scatterlist *src = req->src; + struct scatterlist *dst = req->dst; + + if (unlikely(cryptlen < AES_BLOCK_SIZE)) + return -EINVAL; + + kernel_fpu_begin(); + (*encrypt_iv)(&ctx->tweak_ctx, req->iv); + + /* + * In practice, virtually all XTS plaintexts and ciphertexts are either + * 512 or 4096 bytes, aligned such that they don't span page boundaries. + * To optimize the performance of these cases, and also any other case + * where no page boundary is spanned, the below fast-path handles + * single-page sources and destinations as efficiently as possible. + */ + if (likely(src->length >= cryptlen && dst->length >= cryptlen && + src->offset + cryptlen <= PAGE_SIZE && + dst->offset + cryptlen <= PAGE_SIZE)) { + struct page *src_page = sg_page(src); + struct page *dst_page = sg_page(dst); + void *src_virt = kmap_local_page(src_page) + src->offset; + void *dst_virt = kmap_local_page(dst_page) + dst->offset; + + (*crypt_func)(&ctx->crypt_ctx, src_virt, dst_virt, cryptlen, + req->iv); + kunmap_local(dst_virt); + kunmap_local(src_virt); + kernel_fpu_end(); + return 0; } - return err; + kernel_fpu_end(); + return xts_crypt_slowpath(req, crypt_func); +} + +static void aesni_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, + u8 iv[AES_BLOCK_SIZE]) +{ + aesni_enc(tweak_key, iv, iv); } -static int xts_encrypt(struct skcipher_request *req) +static void aesni_xts_encrypt(const struct crypto_aes_ctx *key, + const u8 *src, u8 *dst, unsigned int len, + u8 tweak[AES_BLOCK_SIZE]) { - return xts_crypt(req, true); + aesni_xts_enc(key, dst, src, len, tweak); } -static int xts_decrypt(struct skcipher_request *req) +static void aesni_xts_decrypt(const struct crypto_aes_ctx *key, + const u8 *src, u8 *dst, unsigned int len, + u8 tweak[AES_BLOCK_SIZE]) { - return xts_crypt(req, false); + aesni_xts_dec(key, dst, src, len, tweak); +} + +static int xts_encrypt_aesni(struct skcipher_request *req) +{ + return xts_crypt(req, aesni_xts_encrypt_iv, aesni_xts_encrypt); +} + +static int xts_decrypt_aesni(struct skcipher_request *req) +{ + return xts_crypt(req, aesni_xts_encrypt_iv, aesni_xts_decrypt); } static struct crypto_alg aesni_cipher_alg = { @@ -1103,9 +1147,9 @@ static struct skcipher_alg aesni_skciphers[] = { .max_keysize = 2 * AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, .walksize = 2 * AES_BLOCK_SIZE, - .setkey = xts_aesni_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, + .setkey = xts_setkey_aesni, + .encrypt = xts_encrypt_aesni, + .decrypt = xts_decrypt_aesni, } }; @@ -1137,7 +1181,149 @@ static struct skcipher_alg aesni_xctr = { }; static struct simd_skcipher_alg *aesni_simd_xctr; -#endif /* CONFIG_X86_64 */ + +asmlinkage void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, + u8 iv[AES_BLOCK_SIZE]); + +#define DEFINE_XTS_ALG(suffix, driver_name, priority) \ + \ +asmlinkage void \ +aes_xts_encrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src, \ + u8 *dst, unsigned int len, u8 tweak[AES_BLOCK_SIZE]); \ +asmlinkage void \ +aes_xts_decrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src, \ + u8 *dst, unsigned int len, u8 tweak[AES_BLOCK_SIZE]); \ + \ +static int xts_encrypt_##suffix(struct skcipher_request *req) \ +{ \ + return xts_crypt(req, aes_xts_encrypt_iv, aes_xts_encrypt_##suffix); \ +} \ + \ +static int xts_decrypt_##suffix(struct skcipher_request *req) \ +{ \ + return xts_crypt(req, aes_xts_encrypt_iv, aes_xts_decrypt_##suffix); \ +} \ + \ +static struct skcipher_alg aes_xts_alg_##suffix = { \ + .base = { \ + .cra_name = "__xts(aes)", \ + .cra_driver_name = "__" driver_name, \ + .cra_priority = priority, \ + .cra_flags = CRYPTO_ALG_INTERNAL, \ + .cra_blocksize = AES_BLOCK_SIZE, \ + .cra_ctxsize = XTS_AES_CTX_SIZE, \ + .cra_module = THIS_MODULE, \ + }, \ + .min_keysize = 2 * AES_MIN_KEY_SIZE, \ + .max_keysize = 2 * AES_MAX_KEY_SIZE, \ + .ivsize = AES_BLOCK_SIZE, \ + .walksize = 2 * AES_BLOCK_SIZE, \ + .setkey = xts_setkey_aesni, \ + .encrypt = xts_encrypt_##suffix, \ + .decrypt = xts_decrypt_##suffix, \ +}; \ + \ +static struct simd_skcipher_alg *aes_xts_simdalg_##suffix + +DEFINE_XTS_ALG(aesni_avx, "xts-aes-aesni-avx", 500); +#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) +DEFINE_XTS_ALG(vaes_avx2, "xts-aes-vaes-avx2", 600); +DEFINE_XTS_ALG(vaes_avx10_256, "xts-aes-vaes-avx10_256", 700); +DEFINE_XTS_ALG(vaes_avx10_512, "xts-aes-vaes-avx10_512", 800); +#endif + +/* + * This is a list of CPU models that are known to suffer from downclocking when + * zmm registers (512-bit vectors) are used. On these CPUs, the AES-XTS + * implementation with zmm registers won't be used by default. An + * implementation with ymm registers (256-bit vectors) will be used instead. + */ +static const struct x86_cpu_id zmm_exclusion_list[] = { + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_SKYLAKE_X }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_X }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_D }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_L }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_NNPI }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_TIGERLAKE_L }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_TIGERLAKE }, + /* Allow Rocket Lake and later, and Sapphire Rapids and later. */ + /* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */ + {}, +}; + +static int __init register_xts_algs(void) +{ + int err; + + if (!boot_cpu_has(X86_FEATURE_AVX)) + return 0; + err = simd_register_skciphers_compat(&aes_xts_alg_aesni_avx, 1, + &aes_xts_simdalg_aesni_avx); + if (err) + return err; +#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) + if (!boot_cpu_has(X86_FEATURE_AVX2) || + !boot_cpu_has(X86_FEATURE_VAES) || + !boot_cpu_has(X86_FEATURE_VPCLMULQDQ) || + !boot_cpu_has(X86_FEATURE_PCLMULQDQ) || + !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) + return 0; + err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx2, 1, + &aes_xts_simdalg_vaes_avx2); + if (err) + return err; + + if (!boot_cpu_has(X86_FEATURE_AVX512BW) || + !boot_cpu_has(X86_FEATURE_AVX512VL) || + !boot_cpu_has(X86_FEATURE_BMI2) || + !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | + XFEATURE_MASK_AVX512, NULL)) + return 0; + + err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_256, 1, + &aes_xts_simdalg_vaes_avx10_256); + if (err) + return err; + + if (x86_match_cpu(zmm_exclusion_list)) + aes_xts_alg_vaes_avx10_512.base.cra_priority = 1; + + err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_512, 1, + &aes_xts_simdalg_vaes_avx10_512); + if (err) + return err; +#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ + return 0; +} + +static void unregister_xts_algs(void) +{ + if (aes_xts_simdalg_aesni_avx) + simd_unregister_skciphers(&aes_xts_alg_aesni_avx, 1, + &aes_xts_simdalg_aesni_avx); +#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) + if (aes_xts_simdalg_vaes_avx2) + simd_unregister_skciphers(&aes_xts_alg_vaes_avx2, 1, + &aes_xts_simdalg_vaes_avx2); + if (aes_xts_simdalg_vaes_avx10_256) + simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_256, 1, + &aes_xts_simdalg_vaes_avx10_256); + if (aes_xts_simdalg_vaes_avx10_512) + simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_512, 1, + &aes_xts_simdalg_vaes_avx10_512); +#endif +} +#else /* CONFIG_X86_64 */ +static int __init register_xts_algs(void) +{ + return 0; +} + +static void unregister_xts_algs(void) +{ +} +#endif /* !CONFIG_X86_64 */ #ifdef CONFIG_X86_64 static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key, @@ -1276,13 +1462,21 @@ static int __init aesni_init(void) goto unregister_aeads; #endif /* CONFIG_X86_64 */ + err = register_xts_algs(); + if (err) + goto unregister_xts; + return 0; +unregister_xts: + unregister_xts_algs(); #ifdef CONFIG_X86_64 + if (aesni_simd_xctr) + simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr); unregister_aeads: +#endif /* CONFIG_X86_64 */ simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads), aesni_simd_aeads); -#endif /* CONFIG_X86_64 */ unregister_skciphers: simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), @@ -1303,6 +1497,7 @@ static void __exit aesni_exit(void) if (boot_cpu_has(X86_FEATURE_AVX)) simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr); #endif /* CONFIG_X86_64 */ + unregister_xts_algs(); } late_initcall(aesni_init); diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index 537b6dcd7ed8..d515a55a3bc1 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -62,20 +62,41 @@ #define SHA256CONSTANTS %rax -#define MSG %xmm0 +#define MSG %xmm0 /* sha256rnds2 implicit operand */ #define STATE0 %xmm1 #define STATE1 %xmm2 -#define MSGTMP0 %xmm3 -#define MSGTMP1 %xmm4 -#define MSGTMP2 %xmm5 -#define MSGTMP3 %xmm6 -#define MSGTMP4 %xmm7 +#define MSG0 %xmm3 +#define MSG1 %xmm4 +#define MSG2 %xmm5 +#define MSG3 %xmm6 +#define TMP %xmm7 #define SHUF_MASK %xmm8 #define ABEF_SAVE %xmm9 #define CDGH_SAVE %xmm10 +.macro do_4rounds i, m0, m1, m2, m3 +.if \i < 16 + movdqu \i*4(DATA_PTR), \m0 + pshufb SHUF_MASK, \m0 +.endif + movdqa (\i-32)*4(SHA256CONSTANTS), MSG + paddd \m0, MSG + sha256rnds2 STATE0, STATE1 +.if \i >= 12 && \i < 60 + movdqa \m0, TMP + palignr $4, \m3, TMP + paddd TMP, \m1 + sha256msg2 \m0, \m1 +.endif + punpckhqdq MSG, MSG + sha256rnds2 STATE1, STATE0 +.if \i >= 4 && \i < 52 + sha256msg1 \m0, \m3 +.endif +.endm + /* * Intel SHA Extensions optimized implementation of a SHA-256 update function * @@ -86,9 +107,6 @@ * store partial blocks. All message padding and hash value initialization must * be done outside the update function. * - * The indented lines in the loop are instructions related to rounds processing. - * The non-indented lines are instructions related to the message schedule. - * * void sha256_ni_transform(uint32_t *digest, const void *data, uint32_t numBlocks); * digest : pointer to digest @@ -108,202 +126,29 @@ SYM_TYPED_FUNC_START(sha256_ni_transform) * Need to reorder these appropriately * DCBA, HGFE -> ABEF, CDGH */ - movdqu 0*16(DIGEST_PTR), STATE0 - movdqu 1*16(DIGEST_PTR), STATE1 + movdqu 0*16(DIGEST_PTR), STATE0 /* DCBA */ + movdqu 1*16(DIGEST_PTR), STATE1 /* HGFE */ - pshufd $0xB1, STATE0, STATE0 /* CDAB */ - pshufd $0x1B, STATE1, STATE1 /* EFGH */ - movdqa STATE0, MSGTMP4 - palignr $8, STATE1, STATE0 /* ABEF */ - pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ + movdqa STATE0, TMP + punpcklqdq STATE1, STATE0 /* FEBA */ + punpckhqdq TMP, STATE1 /* DCHG */ + pshufd $0x1B, STATE0, STATE0 /* ABEF */ + pshufd $0xB1, STATE1, STATE1 /* CDGH */ movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK - lea K256(%rip), SHA256CONSTANTS + lea K256+32*4(%rip), SHA256CONSTANTS .Lloop0: /* Save hash values for addition after rounds */ movdqa STATE0, ABEF_SAVE movdqa STATE1, CDGH_SAVE - /* Rounds 0-3 */ - movdqu 0*16(DATA_PTR), MSG - pshufb SHUF_MASK, MSG - movdqa MSG, MSGTMP0 - paddd 0*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - - /* Rounds 4-7 */ - movdqu 1*16(DATA_PTR), MSG - pshufb SHUF_MASK, MSG - movdqa MSG, MSGTMP1 - paddd 1*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP1, MSGTMP0 - - /* Rounds 8-11 */ - movdqu 2*16(DATA_PTR), MSG - pshufb SHUF_MASK, MSG - movdqa MSG, MSGTMP2 - paddd 2*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP2, MSGTMP1 - - /* Rounds 12-15 */ - movdqu 3*16(DATA_PTR), MSG - pshufb SHUF_MASK, MSG - movdqa MSG, MSGTMP3 - paddd 3*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP3, MSGTMP4 - palignr $4, MSGTMP2, MSGTMP4 - paddd MSGTMP4, MSGTMP0 - sha256msg2 MSGTMP3, MSGTMP0 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP3, MSGTMP2 - - /* Rounds 16-19 */ - movdqa MSGTMP0, MSG - paddd 4*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP0, MSGTMP4 - palignr $4, MSGTMP3, MSGTMP4 - paddd MSGTMP4, MSGTMP1 - sha256msg2 MSGTMP0, MSGTMP1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP0, MSGTMP3 - - /* Rounds 20-23 */ - movdqa MSGTMP1, MSG - paddd 5*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP1, MSGTMP4 - palignr $4, MSGTMP0, MSGTMP4 - paddd MSGTMP4, MSGTMP2 - sha256msg2 MSGTMP1, MSGTMP2 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP1, MSGTMP0 - - /* Rounds 24-27 */ - movdqa MSGTMP2, MSG - paddd 6*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP2, MSGTMP4 - palignr $4, MSGTMP1, MSGTMP4 - paddd MSGTMP4, MSGTMP3 - sha256msg2 MSGTMP2, MSGTMP3 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP2, MSGTMP1 - - /* Rounds 28-31 */ - movdqa MSGTMP3, MSG - paddd 7*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP3, MSGTMP4 - palignr $4, MSGTMP2, MSGTMP4 - paddd MSGTMP4, MSGTMP0 - sha256msg2 MSGTMP3, MSGTMP0 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP3, MSGTMP2 - - /* Rounds 32-35 */ - movdqa MSGTMP0, MSG - paddd 8*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP0, MSGTMP4 - palignr $4, MSGTMP3, MSGTMP4 - paddd MSGTMP4, MSGTMP1 - sha256msg2 MSGTMP0, MSGTMP1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP0, MSGTMP3 - - /* Rounds 36-39 */ - movdqa MSGTMP1, MSG - paddd 9*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP1, MSGTMP4 - palignr $4, MSGTMP0, MSGTMP4 - paddd MSGTMP4, MSGTMP2 - sha256msg2 MSGTMP1, MSGTMP2 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP1, MSGTMP0 - - /* Rounds 40-43 */ - movdqa MSGTMP2, MSG - paddd 10*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP2, MSGTMP4 - palignr $4, MSGTMP1, MSGTMP4 - paddd MSGTMP4, MSGTMP3 - sha256msg2 MSGTMP2, MSGTMP3 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP2, MSGTMP1 - - /* Rounds 44-47 */ - movdqa MSGTMP3, MSG - paddd 11*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP3, MSGTMP4 - palignr $4, MSGTMP2, MSGTMP4 - paddd MSGTMP4, MSGTMP0 - sha256msg2 MSGTMP3, MSGTMP0 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP3, MSGTMP2 - - /* Rounds 48-51 */ - movdqa MSGTMP0, MSG - paddd 12*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP0, MSGTMP4 - palignr $4, MSGTMP3, MSGTMP4 - paddd MSGTMP4, MSGTMP1 - sha256msg2 MSGTMP0, MSGTMP1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP0, MSGTMP3 - - /* Rounds 52-55 */ - movdqa MSGTMP1, MSG - paddd 13*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP1, MSGTMP4 - palignr $4, MSGTMP0, MSGTMP4 - paddd MSGTMP4, MSGTMP2 - sha256msg2 MSGTMP1, MSGTMP2 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - - /* Rounds 56-59 */ - movdqa MSGTMP2, MSG - paddd 14*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP2, MSGTMP4 - palignr $4, MSGTMP1, MSGTMP4 - paddd MSGTMP4, MSGTMP3 - sha256msg2 MSGTMP2, MSGTMP3 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - - /* Rounds 60-63 */ - movdqa MSGTMP3, MSG - paddd 15*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 +.irp i, 0, 16, 32, 48 + do_4rounds (\i + 0), MSG0, MSG1, MSG2, MSG3 + do_4rounds (\i + 4), MSG1, MSG2, MSG3, MSG0 + do_4rounds (\i + 8), MSG2, MSG3, MSG0, MSG1 + do_4rounds (\i + 12), MSG3, MSG0, MSG1, MSG2 +.endr /* Add current hash values with previously saved */ paddd ABEF_SAVE, STATE0 @@ -315,14 +160,14 @@ SYM_TYPED_FUNC_START(sha256_ni_transform) jne .Lloop0 /* Write hash values back in the correct order */ - pshufd $0x1B, STATE0, STATE0 /* FEBA */ - pshufd $0xB1, STATE1, STATE1 /* DCHG */ - movdqa STATE0, MSGTMP4 - pblendw $0xF0, STATE1, STATE0 /* DCBA */ - palignr $8, MSGTMP4, STATE1 /* HGFE */ - - movdqu STATE0, 0*16(DIGEST_PTR) - movdqu STATE1, 1*16(DIGEST_PTR) + movdqa STATE0, TMP + punpcklqdq STATE1, STATE0 /* GHEF */ + punpckhqdq TMP, STATE1 /* ABCD */ + pshufd $0xB1, STATE0, STATE0 /* HGFE */ + pshufd $0x1B, STATE1, STATE1 /* DCBA */ + + movdqu STATE1, 0*16(DIGEST_PTR) + movdqu STATE0, 1*16(DIGEST_PTR) .Ldone_hash: -- 2.45.0 From 1e9165d44c07082e7633bfeea9b10c792e1c6872 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Thu, 2 May 2024 18:28:47 +0200 Subject: [PATCH 2/9] amd-pstate Signed-off-by: Peter Jung --- .../admin-guide/kernel-parameters.txt | 5 + Documentation/admin-guide/pm/amd-pstate.rst | 70 ++- arch/x86/Kconfig | 5 +- arch/x86/include/asm/msr-index.h | 2 + drivers/acpi/cppc_acpi.c | 17 +- drivers/acpi/processor_driver.c | 6 + drivers/cpufreq/acpi-cpufreq.c | 2 - drivers/cpufreq/amd-pstate-ut.c | 2 +- drivers/cpufreq/amd-pstate.c | 586 ++++++++++++++---- include/acpi/cppc_acpi.h | 5 + include/linux/amd-pstate.h | 43 +- include/linux/cpufreq.h | 1 + 12 files changed, 620 insertions(+), 124 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 31fdaf4fe9dd..dd62df26186f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -374,6 +374,11 @@ selects a performance level in this range and appropriate to the current workload. + amd_prefcore= + [X86] + disable + Disable amd-pstate preferred core. + amijoy.map= [HW,JOY] Amiga joystick support Map of devices attached to JOY0DAT and JOY1DAT Format: , diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index 9eb26014d34b..82fbd01da658 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -300,8 +300,8 @@ platforms. The AMD P-States mechanism is the more performance and energy efficiency frequency management method on AMD processors. -AMD Pstate Driver Operation Modes -================================= +``amd-pstate`` Driver Operation Modes +====================================== ``amd_pstate`` CPPC has 3 operation modes: autonomous (active) mode, non-autonomous (passive) mode and guided autonomous (guided) mode. @@ -353,6 +353,48 @@ is activated. In this mode, driver requests minimum and maximum performance level and the platform autonomously selects a performance level in this range and appropriate to the current workload. +``amd-pstate`` Preferred Core +================================= + +The core frequency is subjected to the process variation in semiconductors. +Not all cores are able to reach the maximum frequency respecting the +infrastructure limits. Consequently, AMD has redefined the concept of +maximum frequency of a part. This means that a fraction of cores can reach +maximum frequency. To find the best process scheduling policy for a given +scenario, OS needs to know the core ordering informed by the platform through +highest performance capability register of the CPPC interface. + +``amd-pstate`` preferred core enables the scheduler to prefer scheduling on +cores that can achieve a higher frequency with lower voltage. The preferred +core rankings can dynamically change based on the workload, platform conditions, +thermals and ageing. + +The priority metric will be initialized by the ``amd-pstate`` driver. The ``amd-pstate`` +driver will also determine whether or not ``amd-pstate`` preferred core is +supported by the platform. + +``amd-pstate`` driver will provide an initial core ordering when the system boots. +The platform uses the CPPC interfaces to communicate the core ranking to the +operating system and scheduler to make sure that OS is choosing the cores +with highest performance firstly for scheduling the process. When ``amd-pstate`` +driver receives a message with the highest performance change, it will +update the core ranking and set the cpu's priority. + +``amd-pstate`` Preferred Core Switch +===================================== +Kernel Parameters +----------------- + +``amd-pstate`` peferred core`` has two states: enable and disable. +Enable/disable states can be chosen by different kernel parameters. +Default enable ``amd-pstate`` preferred core. + +``amd_prefcore=disable`` + +For systems that support ``amd-pstate`` preferred core, the core rankings will +always be advertised by the platform. But OS can choose to ignore that via the +kernel parameter ``amd_prefcore=disable``. + User Space Interface in ``sysfs`` - General =========================================== @@ -385,6 +427,30 @@ control its functionality at the system level. They are located in the to the operation mode represented by that string - or to be unregistered in the "disable" case. +``prefcore`` + Preferred core state of the driver: "enabled" or "disabled". + + "enabled" + Enable the ``amd-pstate`` preferred core. + + "disabled" + Disable the ``amd-pstate`` preferred core + + + This attribute is read-only to check the state of preferred core set + by the kernel parameter. + +``cpb_boost`` + Specifies whether core performance boost is requested to be enabled or disabled + If core performance boost is disabled while a core is in a boosted P-state, the + core automatically transitions to the highest performance non-boosted P-state. + AMD Core Performance Boost(CPB) is controlled by this new attribute file which + allow user to change all cores frequency boosting state. It supports both + ``active``, ``passive`` and ``guided`` mode control with below value write to it. + + "0" Disable Core Performance Boosting + "1" Enable Core Performance Boosting + ``cpupower`` tool support for ``amd-pstate`` =============================================== diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6f49999a6b83..67f5f5647fbe 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1055,8 +1055,9 @@ config SCHED_MC config SCHED_MC_PRIO bool "CPU core priorities scheduler support" - depends on SCHED_MC && CPU_SUP_INTEL - select X86_INTEL_PSTATE + depends on SCHED_MC + select X86_INTEL_PSTATE if CPU_SUP_INTEL + select X86_AMD_PSTATE if CPU_SUP_AMD && ACPI select CPU_FREQ default y help diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index d65f8ce6b7cf..f0f4c137ce1e 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -751,6 +751,8 @@ #define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 +#define MSR_K7_HWCR_CPB_DIS_BIT 25 +#define MSR_K7_HWCR_CPB_DIS BIT_ULL(MSR_K7_HWCR_CPB_DIS_BIT) /* K6 MSRs */ #define MSR_K6_WHCR 0xc0000082 diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 1b27ebc6d01d..1d857978f5f4 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -686,8 +686,10 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) if (!osc_sb_cppc2_support_acked) { pr_debug("CPPC v2 _OSC not acked\n"); - if (!cpc_supported_by_cpu()) + if (!cpc_supported_by_cpu()) { + pr_debug("CPPC is not supported by the CPU\n"); return -ENODEV; + } } /* Parse the ACPI _CPC table for this CPU. */ @@ -1199,6 +1201,19 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf); } +/** + * cppc_get_highest_perf - Get the highest performance register value. + * @cpunum: CPU from which to get highest performance. + * @highest_perf: Return address. + * + * Return: 0 for success, -EIO otherwise. + */ +int cppc_get_highest_perf(int cpunum, u64 *highest_perf) +{ + return cppc_get_perf(cpunum, HIGHEST_PERF, highest_perf); +} +EXPORT_SYMBOL_GPL(cppc_get_highest_perf); + /** * cppc_get_epp_perf - Get the epp register value. * @cpunum: CPU from which to get epp preference value. diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 4bd16b3f0781..67db60eda370 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -27,6 +27,7 @@ #define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80 #define ACPI_PROCESSOR_NOTIFY_POWER 0x81 #define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82 +#define ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED 0x85 MODULE_AUTHOR("Paul Diefenbaugh"); MODULE_DESCRIPTION("ACPI Processor Driver"); @@ -83,6 +84,11 @@ static void acpi_processor_notify(acpi_handle handle, u32 event, void *data) acpi_bus_generate_netlink_event(device->pnp.device_class, dev_name(&device->dev), event, 0); break; + case ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED: + cpufreq_update_limits(pr->id); + acpi_bus_generate_netlink_event(device->pnp.device_class, + dev_name(&device->dev), event, 0); + break; default: acpi_handle_debug(handle, "Unsupported event [0x%x]\n", event); break; diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 37f1cdf46d29..2fc82831bddd 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -50,8 +50,6 @@ enum { #define AMD_MSR_RANGE (0x7) #define HYGON_MSR_RANGE (0x7) -#define MSR_K7_HWCR_CPB_DIS (1ULL << 25) - struct acpi_cpufreq_data { unsigned int resume; unsigned int cpu_feature; diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index f04ae67dda37..b3601b0e6dd3 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -226,7 +226,7 @@ static void amd_pstate_ut_check_freq(u32 index) goto skip_test; } - if (cpudata->boost_supported) { + if (amd_pstate_global_params.cpb_boost) { if ((policy->max == cpudata->max_freq) || (policy->max == cpudata->nominal_freq)) amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 07f341995439..56270dc7d958 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -64,6 +65,10 @@ static struct cpufreq_driver amd_pstate_driver; static struct cpufreq_driver amd_pstate_epp_driver; static int cppc_state = AMD_PSTATE_UNDEFINED; static bool cppc_enabled; +static bool amd_pstate_prefcore = true; +static struct quirk_entry *quirks; +struct amd_pstate_global_params amd_pstate_global_params; +EXPORT_SYMBOL_GPL(amd_pstate_global_params); /* * AMD Energy Preference Performance (EPP) @@ -108,6 +113,41 @@ static unsigned int epp_values[] = { typedef int (*cppc_mode_transition_fn)(int); +static struct quirk_entry quirk_amd_7k62 = { + .nominal_freq = 2600, + .lowest_freq = 550, +}; + +static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi) +{ + /** + * match the broken bios for family 17h processor support CPPC V2 + * broken BIOS lack of nominal_freq and lowest_freq capabilities + * definition in ACPI tables + */ + if (boot_cpu_has(X86_FEATURE_ZEN2)) { + quirks = dmi->driver_data; + pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident); + return 1; + } + + return 0; +} + +static const struct dmi_system_id amd_pstate_quirks_table[] __initconst = { + { + .callback = dmi_matched_7k62_bios_bug, + .ident = "AMD EPYC 7K62", + .matches = { + DMI_MATCH(DMI_BIOS_VERSION, "5.14"), + DMI_MATCH(DMI_BIOS_RELEASE, "12/12/2019"), + }, + .driver_data = &quirk_amd_7k62, + }, + {} +}; +MODULE_DEVICE_TABLE(dmi, amd_pstate_quirks_table); + static inline int get_mode_idx_from_str(const char *str, size_t size) { int i; @@ -297,10 +337,8 @@ static int pstate_init_perf(struct amd_cpudata *cpudata) if (ret) return ret; - /* - * TODO: Introduce AMD specific power feature. - * - * CPPC entry doesn't indicate the highest performance in some ASICs. + /* Some CPUs have different highest_perf from others, it is safer + * to read it than to assume some erroneous value, leading to performance issues. */ highest_perf = amd_get_highest_perf(); if (highest_perf > AMD_CPPC_HIGHEST_PERF(cap1)) @@ -311,6 +349,7 @@ static int pstate_init_perf(struct amd_cpudata *cpudata) WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); + WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1)); WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1)); return 0; } @@ -324,6 +363,9 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) if (ret) return ret; + /* Some CPUs have different highest_perf from others, it is safer + * to read it than to assume some erroneous value, leading to performance issues. + */ highest_perf = amd_get_highest_perf(); if (highest_perf > cppc_perf.highest_perf) highest_perf = cppc_perf.highest_perf; @@ -334,6 +376,7 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) WRITE_ONCE(cpudata->lowest_nonlinear_perf, cppc_perf.lowest_nonlinear_perf); WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); + WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf); WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf); if (cppc_state == AMD_PSTATE_ACTIVE) @@ -430,7 +473,10 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags) { + unsigned long max_freq; + struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu); u64 prev = READ_ONCE(cpudata->cppc_req_cached); + u32 nominal_perf = READ_ONCE(cpudata->nominal_perf); u64 value = prev; min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, @@ -439,6 +485,9 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, cpudata->max_limit_perf); des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); + max_freq = READ_ONCE(cpudata->max_limit_freq); + policy->cur = div_u64(des_perf * max_freq, max_perf); + if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { min_perf = des_perf; des_perf = 0; @@ -450,6 +499,10 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, value &= ~AMD_CPPC_DES_PERF(~0L); value |= AMD_CPPC_DES_PERF(des_perf); + /* limit the max perf when core performance boost feature is disabled */ + if (!amd_pstate_global_params.cpb_boost) + max_perf = min_t(unsigned long, nominal_perf, max_perf); + value &= ~AMD_CPPC_MAX_PERF(~0L); value |= AMD_CPPC_MAX_PERF(max_perf); @@ -477,12 +530,19 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy) static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) { - u32 max_limit_perf, min_limit_perf; + u32 max_limit_perf, min_limit_perf, lowest_perf; struct amd_cpudata *cpudata = policy->driver_data; max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); + lowest_perf = READ_ONCE(cpudata->lowest_perf); + if (min_limit_perf < lowest_perf) + min_limit_perf = lowest_perf; + + if (max_limit_perf < min_limit_perf) + max_limit_perf = min_limit_perf; + WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); WRITE_ONCE(cpudata->max_limit_freq, policy->max); @@ -553,10 +613,9 @@ static void amd_pstate_adjust_perf(unsigned int cpu, unsigned long capacity) { unsigned long max_perf, min_perf, des_perf, - cap_perf, lowest_nonlinear_perf, max_freq; + cap_perf, lowest_nonlinear_perf; struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); struct amd_cpudata *cpudata = policy->driver_data; - unsigned int target_freq; if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) amd_pstate_update_min_max_limit(policy); @@ -564,7 +623,6 @@ static void amd_pstate_adjust_perf(unsigned int cpu, cap_perf = READ_ONCE(cpudata->highest_perf); lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); - max_freq = READ_ONCE(cpudata->max_freq); des_perf = cap_perf; if (target_perf < capacity) @@ -582,133 +640,264 @@ static void amd_pstate_adjust_perf(unsigned int cpu, max_perf = min_perf; des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); - target_freq = div_u64(des_perf * max_freq, max_perf); - policy->cur = target_freq; amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true, policy->governor->flags); cpufreq_cpu_put(policy); } -static int amd_get_min_freq(struct amd_cpudata *cpudata) +static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) { - struct cppc_perf_caps cppc_perf; + struct amd_cpudata *cpudata = policy->driver_data; + int ret; - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); - if (ret) + if (!cpudata->boost_supported) { + pr_err("Boost mode is not supported by this processor or SBIOS\n"); + return -ENOTSUPP; + } + + if (state) + policy->cpuinfo.max_freq = cpudata->max_freq; + else + policy->cpuinfo.max_freq = cpudata->nominal_freq; + + policy->max = policy->cpuinfo.max_freq; + + ret = freq_qos_update_request(&cpudata->req[1], + policy->cpuinfo.max_freq); + if (ret < 0) return ret; - /* Switch to khz */ - return cppc_perf.lowest_freq * 1000; + return 0; } -static int amd_get_max_freq(struct amd_cpudata *cpudata) +static int amd_pstate_boost_init(struct amd_cpudata *cpudata) { - struct cppc_perf_caps cppc_perf; - u32 max_perf, max_freq, nominal_freq, nominal_perf; - u64 boost_ratio; + u64 boost_val; + int ret; - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); - if (ret) + if (!boot_cpu_has(X86_FEATURE_CPB)) { + cpudata->boost_supported = false; + current_pstate_driver->boost_enabled = false; + pr_debug_once("Boost CPB capabilities not present in the processor\n"); + return -ENOTSUPP; + } + + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val); + if (ret) { + pr_err_once("failed to read initial CPU boost state!\n"); return ret; + } - nominal_freq = cppc_perf.nominal_freq; - nominal_perf = READ_ONCE(cpudata->nominal_perf); - max_perf = READ_ONCE(cpudata->highest_perf); + amd_pstate_global_params.cpb_supported = !(boost_val & MSR_K7_HWCR_CPB_DIS); + if (amd_pstate_global_params.cpb_supported) + current_pstate_driver->boost_enabled = true; - boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT, - nominal_perf); + amd_pstate_global_params.cpb_boost = amd_pstate_global_params.cpb_supported; - max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT; + return ret; +} - /* Switch to khz */ - return max_freq * 1000; +static void amd_perf_ctl_reset(unsigned int cpu) +{ + wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); } -static int amd_get_nominal_freq(struct amd_cpudata *cpudata) +/* + * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks + * due to locking, so queue the work for later. + */ +static void amd_pstste_sched_prefcore_workfn(struct work_struct *work) { - struct cppc_perf_caps cppc_perf; + sched_set_itmt_support(); +} +static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn); - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); - if (ret) - return ret; +/* + * Get the highest performance register value. + * @cpu: CPU from which to get highest performance. + * @highest_perf: Return address. + * + * Return: 0 for success, -EIO otherwise. + */ +static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf) +{ + int ret; + + if (boot_cpu_has(X86_FEATURE_CPPC)) { + u64 cap1; - /* Switch to khz */ - return cppc_perf.nominal_freq * 1000; + ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1); + if (ret) + return ret; + WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1)); + } else { + u64 cppc_highest_perf; + + ret = cppc_get_highest_perf(cpu, &cppc_highest_perf); + if (ret) + return ret; + WRITE_ONCE(*highest_perf, cppc_highest_perf); + } + + return (ret); } -static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata) +#define CPPC_MAX_PERF U8_MAX + +static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) { - struct cppc_perf_caps cppc_perf; - u32 lowest_nonlinear_freq, lowest_nonlinear_perf, - nominal_freq, nominal_perf; - u64 lowest_nonlinear_ratio; + int ret, prio; + u32 highest_perf; - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); + ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf); if (ret) - return ret; - - nominal_freq = cppc_perf.nominal_freq; - nominal_perf = READ_ONCE(cpudata->nominal_perf); + return; - lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; + cpudata->hw_prefcore = true; + /* check if CPPC preferred core feature is enabled*/ + if (highest_perf < CPPC_MAX_PERF) + prio = (int)highest_perf; + else { + pr_debug("AMD CPPC preferred core is unsupported!\n"); + cpudata->hw_prefcore = false; + return; + } - lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, - nominal_perf); + if (!amd_pstate_prefcore) + return; - lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT; + /* + * The priorities can be set regardless of whether or not + * sched_set_itmt_support(true) has been called and it is valid to + * update them at any time after it has been called. + */ + sched_set_itmt_core_prio(prio, cpudata->cpu); - /* Switch to khz */ - return lowest_nonlinear_freq * 1000; + schedule_work(&sched_prefcore_work); } -static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) +static void amd_pstate_update_limits(unsigned int cpu) { + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); struct amd_cpudata *cpudata = policy->driver_data; + u32 prev_high = 0, cur_high = 0; int ret; + bool highest_perf_changed = false; - if (!cpudata->boost_supported) { - pr_err("Boost mode is not supported by this processor or SBIOS\n"); - return -EINVAL; - } + mutex_lock(&amd_pstate_driver_lock); + if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore)) + goto free_cpufreq_put; - if (state) - policy->cpuinfo.max_freq = cpudata->max_freq; - else - policy->cpuinfo.max_freq = cpudata->nominal_freq; + ret = amd_pstate_get_highest_perf(cpu, &cur_high); + if (ret) + goto free_cpufreq_put; - policy->max = policy->cpuinfo.max_freq; + prev_high = READ_ONCE(cpudata->prefcore_ranking); + if (prev_high != cur_high) { + highest_perf_changed = true; + WRITE_ONCE(cpudata->prefcore_ranking, cur_high); - ret = freq_qos_update_request(&cpudata->req[1], - policy->cpuinfo.max_freq); - if (ret < 0) - return ret; + if (cur_high < CPPC_MAX_PERF) + sched_set_itmt_core_prio((int)cur_high, cpu); + } - return 0; +free_cpufreq_put: + cpufreq_cpu_put(policy); + + if (!highest_perf_changed) + cpufreq_update_policy(cpu); + + mutex_unlock(&amd_pstate_driver_lock); } -static void amd_pstate_boost_init(struct amd_cpudata *cpudata) +/* + * Get pstate transition delay time from ACPI tables that firmware set + * instead of using hardcode value directly. + */ +static u32 amd_pstate_get_transition_delay_us(unsigned int cpu) { - u32 highest_perf, nominal_perf; + u32 transition_delay_ns; - highest_perf = READ_ONCE(cpudata->highest_perf); - nominal_perf = READ_ONCE(cpudata->nominal_perf); + transition_delay_ns = cppc_get_transition_latency(cpu); + if (transition_delay_ns == CPUFREQ_ETERNAL) + return AMD_PSTATE_TRANSITION_DELAY; - if (highest_perf <= nominal_perf) - return; + return transition_delay_ns / NSEC_PER_USEC; +} + +/* + * Get pstate transition latency value from ACPI tables that firmware + * set instead of using hardcode value directly. + */ +static u32 amd_pstate_get_transition_latency(unsigned int cpu) +{ + u32 transition_latency; - cpudata->boost_supported = true; - current_pstate_driver->boost_enabled = true; + transition_latency = cppc_get_transition_latency(cpu); + if (transition_latency == CPUFREQ_ETERNAL) + return AMD_PSTATE_TRANSITION_LATENCY; + + return transition_latency; } -static void amd_perf_ctl_reset(unsigned int cpu) +/* + * amd_pstate_init_freq: Initialize the max_freq, min_freq, + * nominal_freq and lowest_nonlinear_freq for + * the @cpudata object. + * + * Requires: highest_perf, lowest_perf, nominal_perf and + * lowest_nonlinear_perf members of @cpudata to be + * initialized. + * + * Returns 0 on success, non-zero value on failure. + */ +static int amd_pstate_init_freq(struct amd_cpudata *cpudata) { - wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); + int ret; + u32 min_freq; + u32 highest_perf, max_freq; + u32 nominal_perf, nominal_freq; + u32 lowest_nonlinear_perf, lowest_nonlinear_freq; + u32 boost_ratio, lowest_nonlinear_ratio; + struct cppc_perf_caps cppc_perf; + + ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); + if (ret) + return ret; + + if (quirks && quirks->lowest_freq) + min_freq = quirks->lowest_freq * 1000; + else + min_freq = cppc_perf.lowest_freq * 1000; + + if (quirks && quirks->nominal_freq) + nominal_freq = quirks->nominal_freq ; + else + nominal_freq = cppc_perf.nominal_freq; + + nominal_perf = READ_ONCE(cpudata->nominal_perf); + + highest_perf = READ_ONCE(cpudata->highest_perf); + boost_ratio = div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); + max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000; + + lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); + lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, + nominal_perf); + lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT) * 1000; + + WRITE_ONCE(cpudata->min_freq, min_freq); + WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); + WRITE_ONCE(cpudata->nominal_freq, nominal_freq); + WRITE_ONCE(cpudata->max_freq, max_freq); + + return 0; } static int amd_pstate_cpu_init(struct cpufreq_policy *policy) { - int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; + int min_freq, max_freq, nominal_freq, ret; struct device *dev; struct amd_cpudata *cpudata; @@ -727,24 +916,36 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) cpudata->cpu = policy->cpu; + amd_pstate_init_prefcore(cpudata); + + /* initialize cpu cores boot state */ + ret = amd_pstate_boost_init(cpudata); + if (ret) + goto free_cpudata1; + ret = amd_pstate_init_perf(cpudata); if (ret) goto free_cpudata1; - min_freq = amd_get_min_freq(cpudata); - max_freq = amd_get_max_freq(cpudata); - nominal_freq = amd_get_nominal_freq(cpudata); - lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); + ret = amd_pstate_init_freq(cpudata); + if (ret) + goto free_cpudata1; + + min_freq = READ_ONCE(cpudata->min_freq); + max_freq = READ_ONCE(cpudata->max_freq); + nominal_freq = READ_ONCE(cpudata->nominal_freq); - if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { - dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", - min_freq, max_freq); + if (min_freq <= 0 || max_freq <= 0 || + nominal_freq <= 0 || min_freq > max_freq) { + dev_err(dev, + "min_freq(%d) or max_freq(%d) or nominal_freq (%d) value is incorrect, check _CPC in ACPI tables\n", + min_freq, max_freq, nominal_freq); ret = -EINVAL; goto free_cpudata1; } - policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY; - policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY; + policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu); + policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu); policy->min = min_freq; policy->max = max_freq; @@ -772,17 +973,11 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) goto free_cpudata2; } - /* Initial processor data capability frequencies */ - cpudata->max_freq = max_freq; - cpudata->min_freq = min_freq; cpudata->max_limit_freq = max_freq; cpudata->min_limit_freq = min_freq; - cpudata->nominal_freq = nominal_freq; - cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; policy->driver_data = cpudata; - amd_pstate_boost_init(cpudata); if (!current_pstate_driver->adjust_perf) current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; @@ -842,7 +1037,7 @@ static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, int max_freq; struct amd_cpudata *cpudata = policy->driver_data; - max_freq = amd_get_max_freq(cpudata); + max_freq = READ_ONCE(cpudata->max_freq); if (max_freq < 0) return max_freq; @@ -855,7 +1050,7 @@ static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *poli int freq; struct amd_cpudata *cpudata = policy->driver_data; - freq = amd_get_lowest_nonlinear_freq(cpudata); + freq = READ_ONCE(cpudata->lowest_nonlinear_freq); if (freq < 0) return freq; @@ -877,6 +1072,28 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, return sysfs_emit(buf, "%u\n", perf); } +static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy, + char *buf) +{ + u32 perf; + struct amd_cpudata *cpudata = policy->driver_data; + + perf = READ_ONCE(cpudata->prefcore_ranking); + + return sysfs_emit(buf, "%u\n", perf); +} + +static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy, + char *buf) +{ + bool hw_prefcore; + struct amd_cpudata *cpudata = policy->driver_data; + + hw_prefcore = READ_ONCE(cpudata->hw_prefcore); + + return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore)); +} + static ssize_t show_energy_performance_available_preferences( struct cpufreq_policy *policy, char *buf) { @@ -1074,18 +1291,127 @@ static ssize_t status_store(struct device *a, struct device_attribute *b, return ret < 0 ? ret : count; } +static ssize_t prefcore_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore)); +} + +static int amd_cpu_boost_update(struct amd_cpudata *cpudata, u32 on) +{ + struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu); + struct cppc_perf_ctrls perf_ctrls; + u32 highest_perf, nominal_perf, nominal_freq, max_freq; + int ret; + + if (!policy) + return -ENODATA; + + highest_perf = READ_ONCE(cpudata->highest_perf); + nominal_perf = READ_ONCE(cpudata->nominal_perf); + nominal_freq = READ_ONCE(cpudata->nominal_freq); + max_freq = READ_ONCE(cpudata->max_freq); + + if (boot_cpu_has(X86_FEATURE_CPPC)) { + u64 value = READ_ONCE(cpudata->cppc_req_cached); + + value &= ~GENMASK_ULL(7, 0); + value |= on ? highest_perf : nominal_perf; + WRITE_ONCE(cpudata->cppc_req_cached, value); + + wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + } else { + perf_ctrls.max_perf = on ? highest_perf : nominal_perf; + ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); + if (ret) { + cpufreq_cpu_release(policy); + pr_debug("failed to set energy perf value (%d)\n", ret); + return ret; + } + } + + if (on) + policy->cpuinfo.max_freq = max_freq; + else + policy->cpuinfo.max_freq = nominal_freq * 1000; + + policy->max = policy->cpuinfo.max_freq; + + if (cppc_state == AMD_PSTATE_PASSIVE) { + ret = freq_qos_update_request(&cpudata->req[1], + policy->cpuinfo.max_freq); + } + + cpufreq_cpu_release(policy); + + return ret; +} + +static ssize_t cpb_boost_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%u\n", amd_pstate_global_params.cpb_boost); +} + +static ssize_t cpb_boost_store(struct device *dev, struct device_attribute *b, + const char *buf, size_t count) +{ + bool new_state; + ssize_t ret; + int cpu; + + if (!amd_pstate_global_params.cpb_supported) { + pr_err("Boost mode is not supported by this processor or SBIOS\n"); + return -EINVAL; + } + + ret = kstrtobool(buf, &new_state); + if (ret) + return ret; + + mutex_lock(&amd_pstate_driver_lock); + amd_pstate_global_params.cpb_boost = !!new_state; + + for_each_present_cpu(cpu) { + + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct amd_cpudata *cpudata = policy->driver_data; + + if (!cpudata) { + pr_err("cpudata is NULL\n"); + ret = -ENODATA; + cpufreq_cpu_put(policy); + goto err_exit; + } + + amd_cpu_boost_update(cpudata, amd_pstate_global_params.cpb_boost); + refresh_frequency_limits(policy); + cpufreq_cpu_put(policy); + } + +err_exit: + mutex_unlock(&amd_pstate_driver_lock); + return ret < 0 ? ret : count; +} + cpufreq_freq_attr_ro(amd_pstate_max_freq); cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); cpufreq_freq_attr_ro(amd_pstate_highest_perf); +cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking); +cpufreq_freq_attr_ro(amd_pstate_hw_prefcore); cpufreq_freq_attr_rw(energy_performance_preference); cpufreq_freq_attr_ro(energy_performance_available_preferences); static DEVICE_ATTR_RW(status); +static DEVICE_ATTR_RO(prefcore); +static DEVICE_ATTR_RW(cpb_boost); static struct freq_attr *amd_pstate_attr[] = { &amd_pstate_max_freq, &amd_pstate_lowest_nonlinear_freq, &amd_pstate_highest_perf, + &amd_pstate_prefcore_ranking, + &amd_pstate_hw_prefcore, NULL, }; @@ -1093,6 +1419,8 @@ static struct freq_attr *amd_pstate_epp_attr[] = { &amd_pstate_max_freq, &amd_pstate_lowest_nonlinear_freq, &amd_pstate_highest_perf, + &amd_pstate_prefcore_ranking, + &amd_pstate_hw_prefcore, &energy_performance_preference, &energy_performance_available_preferences, NULL, @@ -1100,6 +1428,8 @@ static struct freq_attr *amd_pstate_epp_attr[] = { static struct attribute *pstate_global_attributes[] = { &dev_attr_status.attr, + &dev_attr_prefcore.attr, + &dev_attr_cpb_boost.attr, NULL }; @@ -1130,7 +1460,7 @@ static bool amd_pstate_acpi_pm_profile_undefined(void) static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) { - int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; + int min_freq, max_freq, nominal_freq, ret; struct amd_cpudata *cpudata; struct device *dev; u64 value; @@ -1151,17 +1481,29 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) cpudata->cpu = policy->cpu; cpudata->epp_policy = 0; + amd_pstate_init_prefcore(cpudata); + + /* initialize cpu cores boot state */ + ret = amd_pstate_boost_init(cpudata); + if (ret) + goto free_cpudata1; + ret = amd_pstate_init_perf(cpudata); if (ret) goto free_cpudata1; - min_freq = amd_get_min_freq(cpudata); - max_freq = amd_get_max_freq(cpudata); - nominal_freq = amd_get_nominal_freq(cpudata); - lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); - if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { - dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", - min_freq, max_freq); + ret = amd_pstate_init_freq(cpudata); + if (ret) + goto free_cpudata1; + + min_freq = READ_ONCE(cpudata->min_freq); + max_freq = READ_ONCE(cpudata->max_freq); + nominal_freq = READ_ONCE(cpudata->nominal_freq); + if (min_freq <= 0 || max_freq <= 0 || + nominal_freq <= 0 || min_freq > max_freq) { + dev_err(dev, + "min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect, check _CPC in ACPI tables\n", + min_freq, max_freq, nominal_freq); ret = -EINVAL; goto free_cpudata1; } @@ -1171,12 +1513,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) /* It will be updated by governor */ policy->cur = policy->cpuinfo.min_freq; - /* Initial processor data capability frequencies */ - cpudata->max_freq = max_freq; - cpudata->min_freq = min_freq; - cpudata->nominal_freq = nominal_freq; - cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; - policy->driver_data = cpudata; cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0); @@ -1205,7 +1541,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) return ret; WRITE_ONCE(cpudata->cppc_cap1_cached, value); } - amd_pstate_boost_init(cpudata); return 0; @@ -1232,6 +1567,12 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); + if (min_limit_perf < min_perf) + min_limit_perf = min_perf; + + if (max_limit_perf < min_limit_perf) + max_limit_perf = min_limit_perf; + WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); @@ -1294,6 +1635,12 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) amd_pstate_epp_update_limit(policy); + /* + * policy->cur is never updated with the amd_pstate_epp driver, but it + * is used as a stale frequency value. So, keep it within limits. + */ + policy->cur = policy->min; + return 0; } @@ -1432,6 +1779,7 @@ static struct cpufreq_driver amd_pstate_driver = { .suspend = amd_pstate_cpu_suspend, .resume = amd_pstate_cpu_resume, .set_boost = amd_pstate_set_boost, + .update_limits = amd_pstate_update_limits, .name = "amd-pstate", .attr = amd_pstate_attr, }; @@ -1446,6 +1794,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = { .online = amd_pstate_epp_cpu_online, .suspend = amd_pstate_epp_suspend, .resume = amd_pstate_epp_resume, + .update_limits = amd_pstate_update_limits, .name = "amd-pstate-epp", .attr = amd_pstate_epp_attr, }; @@ -1486,6 +1835,11 @@ static int __init amd_pstate_init(void) if (cpufreq_get_current_driver()) return -EEXIST; + quirks = NULL; + + /* check if this machine need CPPC quirks */ + dmi_check_system(amd_pstate_quirks_table); + switch (cppc_state) { case AMD_PSTATE_UNDEFINED: /* Disable on the following configs by default: @@ -1567,7 +1921,17 @@ static int __init amd_pstate_param(char *str) return amd_pstate_set_driver(mode_idx); } + +static int __init amd_prefcore_param(char *str) +{ + if (!strcmp(str, "disable")) + amd_pstate_prefcore = false; + + return 0; +} + early_param("amd_pstate", amd_pstate_param); +early_param("amd_prefcore", amd_prefcore_param); MODULE_AUTHOR("Huang Rui "); MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 3a0995f8bce8..930b6afba6f4 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -139,6 +139,7 @@ struct cppc_cpudata { #ifdef CONFIG_ACPI_CPPC_LIB extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf); extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf); +extern int cppc_get_highest_perf(int cpunum, u64 *highest_perf); extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_set_enable(int cpu, bool enable); @@ -167,6 +168,10 @@ static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) { return -ENOTSUPP; } +static inline int cppc_get_highest_perf(int cpunum, u64 *highest_perf) +{ + return -ENOTSUPP; +} static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs) { return -ENOTSUPP; diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index 6ad02ad9c7b4..8ba5dd4d3405 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -39,19 +39,31 @@ struct amd_aperf_mperf { * @cppc_req_cached: cached performance request hints * @highest_perf: the maximum performance an individual processor may reach, * assuming ideal conditions + * For platforms that do not support the preferred core feature, the + * highest_pef may be configured with 166 or 255, to avoid max frequency + * calculated wrongly. we take the fixed value as the highest_perf. * @nominal_perf: the maximum sustained performance level of the processor, * assuming ideal operating conditions * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power * savings are achieved * @lowest_perf: the absolute lowest performance level of the processor - * @max_freq: the frequency that mapped to highest_perf - * @min_freq: the frequency that mapped to lowest_perf - * @nominal_freq: the frequency that mapped to nominal_perf - * @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf + * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher + * priority. + * @min_limit_perf: Cached value of the performance corresponding to policy->min + * @max_limit_perf: Cached value of the performance corresponding to policy->max + * @min_limit_freq: Cached value of policy->min (in khz) + * @max_limit_freq: Cached value of policy->max (in khz) + * @max_freq: the frequency (in khz) that mapped to highest_perf + * @min_freq: the frequency (in khz) that mapped to lowest_perf + * @nominal_freq: the frequency (in khz) that mapped to nominal_perf + * @lowest_nonlinear_freq: the frequency (in khz) that mapped to lowest_nonlinear_perf * @cur: Difference of Aperf/Mperf/tsc count between last and current sample * @prev: Last Aperf/Mperf/tsc count value read from register - * @freq: current cpu frequency value + * @freq: current cpu frequency value (in khz) * @boost_supported: check whether the Processor or SBIOS supports boost mode + * @hw_prefcore: check whether HW supports preferred core featue. + * Only when hw_prefcore and early prefcore param are true, + * AMD P-State driver supports preferred core featue. * @epp_policy: Last saved policy used to set energy-performance preference * @epp_cached: Cached CPPC energy-performance preference value * @policy: Cpufreq policy value @@ -70,6 +82,7 @@ struct amd_cpudata { u32 nominal_perf; u32 lowest_nonlinear_perf; u32 lowest_perf; + u32 prefcore_ranking; u32 min_limit_perf; u32 max_limit_perf; u32 min_limit_freq; @@ -85,6 +98,7 @@ struct amd_cpudata { u64 freq; bool boost_supported; + bool hw_prefcore; /* EPP feature related attributes*/ s16 epp_policy; @@ -114,4 +128,23 @@ static const char * const amd_pstate_mode_string[] = { [AMD_PSTATE_GUIDED] = "guided", NULL, }; + +struct quirk_entry { + u32 nominal_freq; + u32 lowest_freq; +}; + +/** + * struct amd_pstate_global_params - Global parameters, mostly tunable via sysfs. + * @cpb_boost: Whether or not to use boost CPU P-states. + * @cpb_supported: Whether or not CPU boost P-states are available + * based on the MSR_K7_HWCR bit[25] state + */ +struct amd_pstate_global_params { + bool cpb_boost; + bool cpb_supported; +}; + +extern struct amd_pstate_global_params amd_pstate_global_params; + #endif /* _LINUX_AMD_PSTATE_H */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 320fab7d2e94..3129411fa978 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -263,6 +263,7 @@ static inline bool cpufreq_supports_freq_invariance(void) return false; } static inline void disable_cpufreq(void) { } +static inline void cpufreq_update_limits(unsigned int cpu) { } #endif #ifdef CONFIG_CPU_FREQ_STAT -- 2.45.0 From f28f675b4a043a6dd9c5370a115b68b4c3f77f18 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Wed, 10 Apr 2024 17:59:09 +0200 Subject: [PATCH 3/9] bbr3 Signed-off-by: Peter Jung --- include/linux/tcp.h | 4 +- include/net/inet_connection_sock.h | 4 +- include/net/tcp.h | 72 +- include/uapi/linux/inet_diag.h | 23 + include/uapi/linux/rtnetlink.h | 4 +- include/uapi/linux/tcp.h | 1 + net/ipv4/Kconfig | 21 +- net/ipv4/bpf_tcp_ca.c | 9 +- net/ipv4/tcp.c | 3 + net/ipv4/tcp_bbr.c | 2230 +++++++++++++++++++++------- net/ipv4/tcp_cong.c | 1 + net/ipv4/tcp_input.c | 40 +- net/ipv4/tcp_minisocks.c | 2 + net/ipv4/tcp_output.c | 48 +- net/ipv4/tcp_rate.c | 30 +- net/ipv4/tcp_timer.c | 1 + 16 files changed, 1940 insertions(+), 553 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a1c47a6d69b0..9e63e5580dc5 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -369,7 +369,9 @@ struct tcp_sock { u8 compressed_ack; u8 dup_ack_counter:2, tlp_retrans:1, /* TLP is a retransmission */ - unused:5; + fast_ack_mode:2, /* which fast ack mode ? */ + tlp_orig_data_app_limited:1, /* app-limited before TLP rtx? */ + unused:2; u8 thin_lto : 1,/* Use linear timeouts for thin streams */ recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index ccf171f7eb60..400a226d6bea 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -137,8 +137,8 @@ struct inet_connection_sock { u32 icsk_probes_tstamp; u32 icsk_user_timeout; - u64 icsk_ca_priv[104 / sizeof(u64)]; -#define ICSK_CA_PRIV_SIZE sizeof_field(struct inet_connection_sock, icsk_ca_priv) +#define ICSK_CA_PRIV_SIZE (144) + u64 icsk_ca_priv[ICSK_CA_PRIV_SIZE / sizeof(u64)]; }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ diff --git a/include/net/tcp.h b/include/net/tcp.h index f6eba9652d01..3998a5f145ad 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -381,6 +381,8 @@ static inline void tcp_dec_quickack_mode(struct sock *sk) #define TCP_ECN_QUEUE_CWR 2 #define TCP_ECN_DEMAND_CWR 4 #define TCP_ECN_SEEN 8 +#define TCP_ECN_LOW 16 +#define TCP_ECN_ECT_PERMANENT 32 enum tcp_tw_status { TCP_TW_SUCCESS = 0, @@ -737,6 +739,15 @@ static inline void tcp_fast_path_check(struct sock *sk) u32 tcp_delack_max(const struct sock *sk); +static inline void tcp_set_ecn_low_from_dst(struct sock *sk, + const struct dst_entry *dst) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (dst_feature(dst, RTAX_FEATURE_ECN_LOW)) + tp->ecn_flags |= TCP_ECN_LOW; +} + /* Compute the actual rto_min value */ static inline u32 tcp_rto_min(const struct sock *sk) { @@ -842,6 +853,11 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) return max_t(s64, t1 - t0, 0); } +static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0) +{ + return max_t(s32, t1 - t0, 0); +} + /* provide the departure time in us unit */ static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb) { @@ -930,9 +946,14 @@ struct tcp_skb_cb { /* pkts S/ACKed so far upon tx of skb, incl retrans: */ __u32 delivered; /* start of send pipeline phase */ - u64 first_tx_mstamp; + u32 first_tx_mstamp; /* when we reached the "delivered" count */ - u64 delivered_mstamp; + u32 delivered_mstamp; +#define TCPCB_IN_FLIGHT_BITS 20 +#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1) + u32 in_flight:20, /* packets in flight at transmit */ + unused2:12; + u32 lost; /* packets lost so far upon tx of skb */ } tx; /* only used for outgoing skbs */ union { struct inet_skb_parm h4; @@ -1036,6 +1057,7 @@ enum tcp_ca_event { CA_EVENT_LOSS, /* loss timeout */ CA_EVENT_ECN_NO_CE, /* ECT set, but not CE marked */ CA_EVENT_ECN_IS_CE, /* received CE marked IP packet */ + CA_EVENT_TLP_RECOVERY, /* a lost segment was repaired by TLP probe */ }; /* Information about inbound ACK, passed to cong_ops->in_ack_event() */ @@ -1058,7 +1080,11 @@ enum tcp_ca_ack_event_flags { #define TCP_CONG_NON_RESTRICTED 0x1 /* Requires ECN/ECT set on all packets */ #define TCP_CONG_NEEDS_ECN 0x2 -#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) +/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */ +#define TCP_CONG_WANTS_CE_EVENTS 0x4 +#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | \ + TCP_CONG_NEEDS_ECN | \ + TCP_CONG_WANTS_CE_EVENTS) union tcp_cc_info; @@ -1078,10 +1104,13 @@ struct ack_sample { */ struct rate_sample { u64 prior_mstamp; /* starting timestamp for interval */ + u32 prior_lost; /* tp->lost at "prior_mstamp" */ u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */ + u32 tx_in_flight; /* packets in flight at starting timestamp */ + s32 lost; /* number of packets lost over interval */ s32 delivered; /* number of packets delivered over interval */ - s32 delivered_ce; /* number of packets delivered w/ CE marks*/ + s32 delivered_ce; /* packets delivered w/ CE mark over interval */ long interval_us; /* time for tp->delivered to incr "delivered" */ u32 snd_interval_us; /* snd interval for delivered packets */ u32 rcv_interval_us; /* rcv interval for delivered packets */ @@ -1092,7 +1121,9 @@ struct rate_sample { u32 last_end_seq; /* end_seq of most recently ACKed packet */ bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ + bool is_acking_tlp_retrans_seq; /* ACKed a TLP retransmit sequence? */ bool is_ack_delayed; /* is this (likely) a delayed ACK? */ + bool is_ece; /* did this ACK have ECN marked? */ }; struct tcp_congestion_ops { @@ -1116,8 +1147,11 @@ struct tcp_congestion_ops { /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); - /* override sysctl_tcp_min_tso_segs */ - u32 (*min_tso_segs)(struct sock *sk); + /* pick target number of segments per TSO/GSO skb (optional): */ + u32 (*tso_segs)(struct sock *sk, unsigned int mss_now); + + /* react to a specific lost skb (optional) */ + void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb); /* call when packets are delivered to update cwnd and pacing rate, * after all the ca_state processing. (optional) @@ -1183,6 +1217,14 @@ static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer) } #endif +static inline bool tcp_ca_wants_ce_events(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN | + TCP_CONG_WANTS_CE_EVENTS); +} + static inline bool tcp_ca_needs_ecn(const struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -1202,6 +1244,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) void tcp_set_ca_state(struct sock *sk, const u8 ca_state); /* From tcp_rate.c */ +void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs); @@ -1214,6 +1257,21 @@ static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) return t1 > t2 || (t1 == t2 && after(seq1, seq2)); } +/* If a retransmit failed due to local qdisc congestion or other local issues, + * then we may have called tcp_set_skb_tso_segs() to increase the number of + * segments in the skb without increasing the tx.in_flight. In all other cases, + * the tx.in_flight should be at least as big as the pcount of the sk_buff. We + * do not have the state to know whether a retransmit failed due to local qdisc + * congestion or other local issues, so to avoid spurious warnings we consider + * that any skb marked lost may have suffered that fate. + */ +static inline bool tcp_skb_tx_in_flight_is_suspicious(u32 skb_pcount, + u32 skb_sacked_flags, + u32 tx_in_flight) +{ + return (skb_pcount > tx_in_flight) && !(skb_sacked_flags & TCPCB_LOST); +} + /* These functions determine how the current flow behaves in respect of SACK * handling. SACK is negotiated with the peer, and therefore it can vary * between different flows. @@ -2373,7 +2431,7 @@ struct tcp_plb_state { u8 consec_cong_rounds:5, /* consecutive congested rounds */ unused:3; u32 pause_until; /* jiffies32 when PLB can resume rerouting */ -}; +} __attribute__ ((__packed__)); static inline void tcp_plb_init(const struct sock *sk, struct tcp_plb_state *plb) diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 50655de04c9b..82f8bd8f0d16 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -229,6 +229,29 @@ struct tcp_bbr_info { __u32 bbr_min_rtt; /* min-filtered RTT in uSec */ __u32 bbr_pacing_gain; /* pacing gain shifted left 8 bits */ __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */ + __u32 bbr_bw_hi_lsb; /* lower 32 bits of bw_hi */ + __u32 bbr_bw_hi_msb; /* upper 32 bits of bw_hi */ + __u32 bbr_bw_lo_lsb; /* lower 32 bits of bw_lo */ + __u32 bbr_bw_lo_msb; /* upper 32 bits of bw_lo */ + __u8 bbr_mode; /* current bbr_mode in state machine */ + __u8 bbr_phase; /* current state machine phase */ + __u8 unused1; /* alignment padding; not used yet */ + __u8 bbr_version; /* BBR algorithm version */ + __u32 bbr_inflight_lo; /* lower short-term data volume bound */ + __u32 bbr_inflight_hi; /* higher long-term data volume bound */ + __u32 bbr_extra_acked; /* max excess packets ACKed in epoch */ +}; + +/* TCP BBR congestion control bbr_phase as reported in netlink/ss stats. */ +enum tcp_bbr_phase { + BBR_PHASE_INVALID = 0, + BBR_PHASE_STARTUP = 1, + BBR_PHASE_DRAIN = 2, + BBR_PHASE_PROBE_RTT = 3, + BBR_PHASE_PROBE_BW_UP = 4, + BBR_PHASE_PROBE_BW_DOWN = 5, + BBR_PHASE_PROBE_BW_CRUISE = 6, + BBR_PHASE_PROBE_BW_REFILL = 7, }; union tcp_cc_info { diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 3b687d20c9ed..a7c30c243b54 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -507,12 +507,14 @@ enum { #define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */ #define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */ #define RTAX_FEATURE_TCP_USEC_TS (1 << 4) +#define RTAX_FEATURE_ECN_LOW (1 << 5) #define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \ RTAX_FEATURE_SACK | \ RTAX_FEATURE_TIMESTAMP | \ RTAX_FEATURE_ALLFRAG | \ - RTAX_FEATURE_TCP_USEC_TS) + RTAX_FEATURE_TCP_USEC_TS | \ + RTAX_FEATURE_ECN_LOW) struct rta_session { __u8 proto; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index c07e9f90c084..5c88336ced60 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -176,6 +176,7 @@ enum tcp_fastopen_client_fail { #define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */ #define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */ #define TCPI_OPT_USEC_TS 64 /* usec timestamps */ +#define TCPI_OPT_ECN_LOW 128 /* Low-latency ECN configured at init */ /* * Sender's congestion state indicating normal or abnormal situations diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 8e94ed7c56a0..50dc9970cad2 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -668,15 +668,18 @@ config TCP_CONG_BBR default n help - BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to - maximize network utilization and minimize queues. It builds an explicit - model of the bottleneck delivery rate and path round-trip propagation - delay. It tolerates packet loss and delay unrelated to congestion. It - can operate over LAN, WAN, cellular, wifi, or cable modem links. It can - coexist with flows that use loss-based congestion control, and can - operate with shallow buffers, deep buffers, bufferbloat, policers, or - AQM schemes that do not provide a delay signal. It requires the fq - ("Fair Queue") pacing packet scheduler. + BBR (Bottleneck Bandwidth and RTT) TCP congestion control is a + model-based congestion control algorithm that aims to maximize + network utilization, keep queues and retransmit rates low, and to be + able to coexist with Reno/CUBIC in common scenarios. It builds an + explicit model of the network path. It tolerates a targeted degree + of random packet loss and delay. It can operate over LAN, WAN, + cellular, wifi, or cable modem links, and can use shallow-threshold + ECN signals. It can coexist to some degree with flows that use + loss-based congestion control, and can operate with shallow buffers, + deep buffers, bufferbloat, policers, or AQM schemes that do not + provide a delay signal. It requires pacing, using either TCP internal + pacing or the fq ("Fair Queue") pacing packet scheduler. choice prompt "Default TCP congestion control" diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index ae8b15e6896f..beb040e80b6f 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -296,11 +296,15 @@ static void bpf_tcp_ca_pkts_acked(struct sock *sk, const struct ack_sample *samp { } -static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk) +static u32 bpf_tcp_ca_tso_segs(struct sock *sk, unsigned int mss_now) { return 0; } +static void bpf_tcp_ca_skb_marked_lost(struct sock *sk, const struct sk_buff *skb) +{ +} + static void bpf_tcp_ca_cong_control(struct sock *sk, const struct rate_sample *rs) { } @@ -330,7 +334,8 @@ static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = { .cwnd_event = bpf_tcp_ca_cwnd_event, .in_ack_event = bpf_tcp_ca_in_ack_event, .pkts_acked = bpf_tcp_ca_pkts_acked, - .min_tso_segs = bpf_tcp_ca_min_tso_segs, + .tso_segs = bpf_tcp_ca_tso_segs, + .skb_marked_lost = bpf_tcp_ca_skb_marked_lost, .cong_control = bpf_tcp_ca_cong_control, .undo_cwnd = bpf_tcp_ca_undo_cwnd, .sndbuf_expand = bpf_tcp_ca_sndbuf_expand, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5887eac87bd2..4419290c0de8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3091,6 +3091,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->rx_opt.dsack = 0; tp->rx_opt.num_sacks = 0; tp->rcv_ooopack = 0; + tp->fast_ack_mode = 0; /* Clean up fastopen related fields */ @@ -3817,6 +3818,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_options |= TCPI_OPT_ECN; if (tp->ecn_flags & TCP_ECN_SEEN) info->tcpi_options |= TCPI_OPT_ECN_SEEN; + if (tp->ecn_flags & TCP_ECN_LOW) + info->tcpi_options |= TCPI_OPT_ECN_LOW; if (tp->syn_data_acked) info->tcpi_options |= TCPI_OPT_SYN_DATA; if (tp->tcp_usec_ts) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 22358032dd48..cd6bef71bf4c 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -1,18 +1,19 @@ -/* Bottleneck Bandwidth and RTT (BBR) congestion control +/* BBR (Bottleneck Bandwidth and RTT) congestion control * - * BBR congestion control computes the sending rate based on the delivery - * rate (throughput) estimated from ACKs. In a nutshell: + * BBR is a model-based congestion control algorithm that aims for low queues, + * low loss, and (bounded) Reno/CUBIC coexistence. To maintain a model of the + * network path, it uses measurements of bandwidth and RTT, as well as (if they + * occur) packet loss and/or shallow-threshold ECN signals. Note that although + * it can use ECN or loss signals explicitly, it does not require either; it + * can bound its in-flight data based on its estimate of the BDP. * - * On each ACK, update our model of the network path: - * bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips) - * min_rtt = windowed_min(rtt, 10 seconds) - * pacing_rate = pacing_gain * bottleneck_bandwidth - * cwnd = max(cwnd_gain * bottleneck_bandwidth * min_rtt, 4) - * - * The core algorithm does not react directly to packet losses or delays, - * although BBR may adjust the size of next send per ACK when loss is - * observed, or adjust the sending rate if it estimates there is a - * traffic policer, in order to keep the drop rate reasonable. + * The model has both higher and lower bounds for the operating range: + * lo: bw_lo, inflight_lo: conservative short-term lower bound + * hi: bw_hi, inflight_hi: robust long-term upper bound + * The bandwidth-probing time scale is (a) extended dynamically based on + * estimated BDP to improve coexistence with Reno/CUBIC; (b) bounded by + * an interactive wall-clock time-scale to be more scalable and responsive + * than Reno and CUBIC. * * Here is a state transition diagram for BBR: * @@ -65,6 +66,13 @@ #include #include +#include +#include "tcp_dctcp.h" + +#define BBR_VERSION 3 + +#define bbr_param(sk,name) (bbr_ ## name) + /* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps. * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32. @@ -85,36 +93,41 @@ enum bbr_mode { BBR_PROBE_RTT, /* cut inflight to min to probe min_rtt */ }; +/* How does the incoming ACK stream relate to our bandwidth probing? */ +enum bbr_ack_phase { + BBR_ACKS_INIT, /* not probing; not getting probe feedback */ + BBR_ACKS_REFILLING, /* sending at est. bw to fill pipe */ + BBR_ACKS_PROBE_STARTING, /* inflight rising to probe bw */ + BBR_ACKS_PROBE_FEEDBACK, /* getting feedback from bw probing */ + BBR_ACKS_PROBE_STOPPING, /* stopped probing; still getting feedback */ +}; + /* BBR congestion control block */ struct bbr { u32 min_rtt_us; /* min RTT in min_rtt_win_sec window */ u32 min_rtt_stamp; /* timestamp of min_rtt_us */ u32 probe_rtt_done_stamp; /* end time for BBR_PROBE_RTT mode */ - struct minmax bw; /* Max recent delivery rate in pkts/uS << 24 */ - u32 rtt_cnt; /* count of packet-timed rounds elapsed */ + u32 probe_rtt_min_us; /* min RTT in probe_rtt_win_ms win */ + u32 probe_rtt_min_stamp; /* timestamp of probe_rtt_min_us*/ u32 next_rtt_delivered; /* scb->tx.delivered at end of round */ u64 cycle_mstamp; /* time of this cycle phase start */ - u32 mode:3, /* current bbr_mode in state machine */ + u32 mode:2, /* current bbr_mode in state machine */ prev_ca_state:3, /* CA state on previous ACK */ - packet_conservation:1, /* use packet conservation? */ round_start:1, /* start of packet-timed tx->ack round? */ + ce_state:1, /* If most recent data has CE bit set */ + bw_probe_up_rounds:5, /* cwnd-limited rounds in PROBE_UP */ + try_fast_path:1, /* can we take fast path? */ idle_restart:1, /* restarting after idle? */ probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ - unused:13, - lt_is_sampling:1, /* taking long-term ("LT") samples now? */ - lt_rtt_cnt:7, /* round trips in long-term interval */ - lt_use_bw:1; /* use lt_bw as our bw estimate? */ - u32 lt_bw; /* LT est delivery rate in pkts/uS << 24 */ - u32 lt_last_delivered; /* LT intvl start: tp->delivered */ - u32 lt_last_stamp; /* LT intvl start: tp->delivered_mstamp */ - u32 lt_last_lost; /* LT intvl start: tp->lost */ + init_cwnd:7, /* initial cwnd */ + unused_1:10; u32 pacing_gain:10, /* current gain for setting pacing rate */ cwnd_gain:10, /* current gain for setting cwnd */ full_bw_reached:1, /* reached full bw in Startup? */ full_bw_cnt:2, /* number of rounds without large bw gains */ - cycle_idx:3, /* current index in pacing_gain cycle array */ + cycle_idx:2, /* current index in pacing_gain cycle array */ has_seen_rtt:1, /* have we seen an RTT sample yet? */ - unused_b:5; + unused_2:6; u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ u32 full_bw; /* recent bw, to estimate if pipe is full */ @@ -124,19 +137,67 @@ struct bbr { u32 ack_epoch_acked:20, /* packets (S)ACKed in sampling epoch */ extra_acked_win_rtts:5, /* age of extra_acked, in round trips */ extra_acked_win_idx:1, /* current index in extra_acked array */ - unused_c:6; + /* BBR v3 state: */ + full_bw_now:1, /* recently reached full bw plateau? */ + startup_ecn_rounds:2, /* consecutive hi ECN STARTUP rounds */ + loss_in_cycle:1, /* packet loss in this cycle? */ + ecn_in_cycle:1, /* ECN in this cycle? */ + unused_3:1; + u32 loss_round_delivered; /* scb->tx.delivered ending loss round */ + u32 undo_bw_lo; /* bw_lo before latest losses */ + u32 undo_inflight_lo; /* inflight_lo before latest losses */ + u32 undo_inflight_hi; /* inflight_hi before latest losses */ + u32 bw_latest; /* max delivered bw in last round trip */ + u32 bw_lo; /* lower bound on sending bandwidth */ + u32 bw_hi[2]; /* max recent measured bw sample */ + u32 inflight_latest; /* max delivered data in last round trip */ + u32 inflight_lo; /* lower bound of inflight data range */ + u32 inflight_hi; /* upper bound of inflight data range */ + u32 bw_probe_up_cnt; /* packets delivered per inflight_hi incr */ + u32 bw_probe_up_acks; /* packets (S)ACKed since inflight_hi incr */ + u32 probe_wait_us; /* PROBE_DOWN until next clock-driven probe */ + u32 prior_rcv_nxt; /* tp->rcv_nxt when CE state last changed */ + u32 ecn_eligible:1, /* sender can use ECN (RTT, handshake)? */ + ecn_alpha:9, /* EWMA delivered_ce/delivered; 0..256 */ + bw_probe_samples:1, /* rate samples reflect bw probing? */ + prev_probe_too_high:1, /* did last PROBE_UP go too high? */ + stopped_risky_probe:1, /* last PROBE_UP stopped due to risk? */ + rounds_since_probe:8, /* packet-timed rounds since probed bw */ + loss_round_start:1, /* loss_round_delivered round trip? */ + loss_in_round:1, /* loss marked in this round trip? */ + ecn_in_round:1, /* ECN marked in this round trip? */ + ack_phase:3, /* bbr_ack_phase: meaning of ACKs */ + loss_events_in_round:4,/* losses in STARTUP round */ + initialized:1; /* has bbr_init() been called? */ + u32 alpha_last_delivered; /* tp->delivered at alpha update */ + u32 alpha_last_delivered_ce; /* tp->delivered_ce at alpha update */ + + u8 unused_4; /* to preserve alignment */ + struct tcp_plb_state plb; }; -#define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */ +struct bbr_context { + u32 sample_bw; +}; -/* Window length of bw filter (in rounds): */ -static const int bbr_bw_rtts = CYCLE_LEN + 2; /* Window length of min_rtt filter (in sec): */ static const u32 bbr_min_rtt_win_sec = 10; /* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */ static const u32 bbr_probe_rtt_mode_ms = 200; -/* Skip TSO below the following bandwidth (bits/sec): */ -static const int bbr_min_tso_rate = 1200000; +/* Window length of probe_rtt_min_us filter (in ms), and consequently the + * typical interval between PROBE_RTT mode entries. The default is 5000ms. + * Note that bbr_probe_rtt_win_ms must be <= bbr_min_rtt_win_sec * MSEC_PER_SEC + */ +static const u32 bbr_probe_rtt_win_ms = 5000; +/* Proportion of cwnd to estimated BDP in PROBE_RTT, in units of BBR_UNIT: */ +static const u32 bbr_probe_rtt_cwnd_gain = BBR_UNIT * 1 / 2; + +/* Use min_rtt to help adapt TSO burst size, with smaller min_rtt resulting + * in bigger TSO bursts. We cut the RTT-based allowance in half + * for every 2^9 usec (aka 512 us) of RTT, so that the RTT-based allowance + * is below 1500 bytes after 6 * ~500 usec = 3ms. + */ +static const u32 bbr_tso_rtt_shift = 9; /* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck. * In order to help drive the network toward lower queues and low latency while @@ -146,13 +207,15 @@ static const int bbr_min_tso_rate = 1200000; */ static const int bbr_pacing_margin_percent = 1; -/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain +/* We use a startup_pacing_gain of 4*ln(2) because it's the smallest value * that will allow a smoothly increasing pacing rate that will double each RTT * and send the same number of packets per RTT that an un-paced, slow-starting * Reno or CUBIC flow would: */ -static const int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1; -/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain +static const int bbr_startup_pacing_gain = BBR_UNIT * 277 / 100 + 1; +/* The gain for deriving startup cwnd: */ +static const int bbr_startup_cwnd_gain = BBR_UNIT * 2; +/* The pacing gain in BBR_DRAIN is calculated to typically drain * the queue created in BBR_STARTUP in a single round: */ static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885; @@ -160,13 +223,17 @@ static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885; static const int bbr_cwnd_gain = BBR_UNIT * 2; /* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw: */ static const int bbr_pacing_gain[] = { - BBR_UNIT * 5 / 4, /* probe for more available bw */ - BBR_UNIT * 3 / 4, /* drain queue and/or yield bw to other flows */ - BBR_UNIT, BBR_UNIT, BBR_UNIT, /* cruise at 1.0*bw to utilize pipe, */ - BBR_UNIT, BBR_UNIT, BBR_UNIT /* without creating excess queue... */ + BBR_UNIT * 5 / 4, /* UP: probe for more available bw */ + BBR_UNIT * 91 / 100, /* DOWN: drain queue and/or yield bw */ + BBR_UNIT, /* CRUISE: try to use pipe w/ some headroom */ + BBR_UNIT, /* REFILL: refill pipe to estimated 100% */ +}; +enum bbr_pacing_gain_phase { + BBR_BW_PROBE_UP = 0, /* push up inflight to probe for bw/vol */ + BBR_BW_PROBE_DOWN = 1, /* drain excess inflight from the queue */ + BBR_BW_PROBE_CRUISE = 2, /* use pipe, w/ headroom in queue/pipe */ + BBR_BW_PROBE_REFILL = 3, /* v2: refill the pipe again to 100% */ }; -/* Randomize the starting gain cycling phase over N phases: */ -static const u32 bbr_cycle_rand = 7; /* Try to keep at least this many packets in flight, if things go smoothly. For * smooth functioning, a sliding window protocol ACKing every other packet @@ -174,24 +241,12 @@ static const u32 bbr_cycle_rand = 7; */ static const u32 bbr_cwnd_min_target = 4; -/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */ +/* To estimate if BBR_STARTUP or BBR_BW_PROBE_UP has filled pipe... */ /* If bw has increased significantly (1.25x), there may be more bw available: */ static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4; /* But after 3 rounds w/o significant bw growth, estimate pipe is full: */ static const u32 bbr_full_bw_cnt = 3; -/* "long-term" ("LT") bandwidth estimator parameters... */ -/* The minimum number of rounds in an LT bw sampling interval: */ -static const u32 bbr_lt_intvl_min_rtts = 4; -/* If lost/delivered ratio > 20%, interval is "lossy" and we may be policed: */ -static const u32 bbr_lt_loss_thresh = 50; -/* If 2 intervals have a bw ratio <= 1/8, their bw is "consistent": */ -static const u32 bbr_lt_bw_ratio = BBR_UNIT / 8; -/* If 2 intervals have a bw diff <= 4 Kbit/sec their bw is "consistent": */ -static const u32 bbr_lt_bw_diff = 4000 / 8; -/* If we estimate we're policed, use lt_bw for this many round trips: */ -static const u32 bbr_lt_bw_max_rtts = 48; - /* Gain factor for adding extra_acked to target cwnd: */ static const int bbr_extra_acked_gain = BBR_UNIT; /* Window length of extra_acked window. */ @@ -201,8 +256,121 @@ static const u32 bbr_ack_epoch_acked_reset_thresh = 1U << 20; /* Time period for clamping cwnd increment due to ack aggregation */ static const u32 bbr_extra_acked_max_us = 100 * 1000; +/* Flags to control BBR ECN-related behavior... */ + +/* Ensure ACKs only ACK packets with consistent ECN CE status? */ +static const bool bbr_precise_ece_ack = true; + +/* Max RTT (in usec) at which to use sender-side ECN logic. + * Disabled when 0 (ECN allowed at any RTT). + */ +static const u32 bbr_ecn_max_rtt_us = 5000; + +/* On losses, scale down inflight and pacing rate by beta scaled by BBR_SCALE. + * No loss response when 0. + */ +static const u32 bbr_beta = BBR_UNIT * 30 / 100; + +/* Gain factor for ECN mark ratio samples, scaled by BBR_SCALE (1/16 = 6.25%) */ +static const u32 bbr_ecn_alpha_gain = BBR_UNIT * 1 / 16; + +/* The initial value for ecn_alpha; 1.0 allows a flow to respond quickly + * to congestion if the bottleneck is congested when the flow starts up. + */ +static const u32 bbr_ecn_alpha_init = BBR_UNIT; + +/* On ECN, cut inflight_lo to (1 - ecn_factor * ecn_alpha) scaled by BBR_SCALE. + * No ECN based bounding when 0. + */ +static const u32 bbr_ecn_factor = BBR_UNIT * 1 / 3; /* 1/3 = 33% */ + +/* Estimate bw probing has gone too far if CE ratio exceeds this threshold. + * Scaled by BBR_SCALE. Disabled when 0. + */ +static const u32 bbr_ecn_thresh = BBR_UNIT * 1 / 2; /* 1/2 = 50% */ + +/* If non-zero, if in a cycle with no losses but some ECN marks, after ECN + * clears then make the first round's increment to inflight_hi the following + * fraction of inflight_hi. + */ +static const u32 bbr_ecn_reprobe_gain = BBR_UNIT * 1 / 2; + +/* Estimate bw probing has gone too far if loss rate exceeds this level. */ +static const u32 bbr_loss_thresh = BBR_UNIT * 2 / 100; /* 2% loss */ + +/* Slow down for a packet loss recovered by TLP? */ +static const bool bbr_loss_probe_recovery = true; + +/* Exit STARTUP if number of loss marking events in a Recovery round is >= N, + * and loss rate is higher than bbr_loss_thresh. + * Disabled if 0. + */ +static const u32 bbr_full_loss_cnt = 6; + +/* Exit STARTUP if number of round trips with ECN mark rate above ecn_thresh + * meets this count. + */ +static const u32 bbr_full_ecn_cnt = 2; + +/* Fraction of unutilized headroom to try to leave in path upon high loss. */ +static const u32 bbr_inflight_headroom = BBR_UNIT * 15 / 100; + +/* How much do we increase cwnd_gain when probing for bandwidth in + * BBR_BW_PROBE_UP? This specifies the increment in units of + * BBR_UNIT/4. The default is 1, meaning 0.25. + * The min value is 0 (meaning 0.0); max is 3 (meaning 0.75). + */ +static const u32 bbr_bw_probe_cwnd_gain = 1; + +/* Max number of packet-timed rounds to wait before probing for bandwidth. If + * we want to tolerate 1% random loss per round, and not have this cut our + * inflight too much, we must probe for bw periodically on roughly this scale. + * If low, limits Reno/CUBIC coexistence; if high, limits loss tolerance. + * We aim to be fair with Reno/CUBIC up to a BDP of at least: + * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets + */ +static const u32 bbr_bw_probe_max_rounds = 63; + +/* Max amount of randomness to inject in round counting for Reno-coexistence. + */ +static const u32 bbr_bw_probe_rand_rounds = 2; + +/* Use BBR-native probe time scale starting at this many usec. + * We aim to be fair with Reno/CUBIC up to an inter-loss time epoch of at least: + * BDP*RTT = 25Mbps * .030sec /(1514bytes) * 0.030sec = 1.9 secs + */ +static const u32 bbr_bw_probe_base_us = 2 * USEC_PER_SEC; /* 2 secs */ + +/* Use BBR-native probes spread over this many usec: */ +static const u32 bbr_bw_probe_rand_us = 1 * USEC_PER_SEC; /* 1 secs */ + +/* Use fast path if app-limited, no loss/ECN, and target cwnd was reached? */ +static const bool bbr_fast_path = true; + +/* Use fast ack mode? */ +static const bool bbr_fast_ack_mode = true; + +static u32 bbr_max_bw(const struct sock *sk); +static u32 bbr_bw(const struct sock *sk); +static void bbr_exit_probe_rtt(struct sock *sk); +static void bbr_reset_congestion_signals(struct sock *sk); +static void bbr_run_loss_probe_recovery(struct sock *sk); + static void bbr_check_probe_rtt_done(struct sock *sk); +/* This connection can use ECN if both endpoints have signaled ECN support in + * the handshake and the per-route settings indicated this is a + * shallow-threshold ECN environment, meaning both: + * (a) ECN CE marks indicate low-latency/shallow-threshold congestion, and + * (b) TCP endpoints provide precise ACKs that only ACK data segments + * with consistent ECN CE status + */ +static bool bbr_can_use_ecn(const struct sock *sk) +{ + return (tcp_sk(sk)->ecn_flags & TCP_ECN_OK) && + (tcp_sk(sk)->ecn_flags & TCP_ECN_LOW); +} + /* Do we estimate that STARTUP filled the pipe? */ static bool bbr_full_bw_reached(const struct sock *sk) { @@ -214,17 +382,17 @@ static bool bbr_full_bw_reached(const struct sock *sk) /* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ static u32 bbr_max_bw(const struct sock *sk) { - struct bbr *bbr = inet_csk_ca(sk); + const struct bbr *bbr = inet_csk_ca(sk); - return minmax_get(&bbr->bw); + return max(bbr->bw_hi[0], bbr->bw_hi[1]); } /* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */ static u32 bbr_bw(const struct sock *sk) { - struct bbr *bbr = inet_csk_ca(sk); + const struct bbr *bbr = inet_csk_ca(sk); - return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk); + return min(bbr_max_bw(sk), bbr->bw_lo); } /* Return maximum extra acked in past k-2k round trips, @@ -241,15 +409,23 @@ static u16 bbr_extra_acked(const struct sock *sk) * The order here is chosen carefully to avoid overflow of u64. This should * work for input rates of up to 2.9Tbit/sec and gain of 2.89x. */ -static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) +static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain, + int margin) { unsigned int mss = tcp_sk(sk)->mss_cache; rate *= mss; rate *= gain; rate >>= BBR_SCALE; - rate *= USEC_PER_SEC / 100 * (100 - bbr_pacing_margin_percent); - return rate >> BW_SCALE; + rate *= USEC_PER_SEC / 100 * (100 - margin); + rate >>= BW_SCALE; + rate = max(rate, 1ULL); + return rate; +} + +static u64 bbr_bw_bytes_per_sec(struct sock *sk, u64 rate) +{ + return bbr_rate_bytes_per_sec(sk, rate, BBR_UNIT, 0); } /* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */ @@ -257,12 +433,13 @@ static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) { u64 rate = bw; - rate = bbr_rate_bytes_per_sec(sk, rate, gain); + rate = bbr_rate_bytes_per_sec(sk, rate, gain, + bbr_pacing_margin_percent); rate = min_t(u64, rate, READ_ONCE(sk->sk_max_pacing_rate)); return rate; } -/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ +/* Initialize pacing rate to: startup_pacing_gain * init_cwnd / RTT. */ static void bbr_init_pacing_rate_from_rtt(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -279,7 +456,7 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk) bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT; do_div(bw, rtt_us); WRITE_ONCE(sk->sk_pacing_rate, - bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain)); + bbr_bw_to_pacing_rate(sk, bw, bbr_param(sk, startup_pacing_gain))); } /* Pace using current bw estimate and a gain factor. */ @@ -295,26 +472,48 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) WRITE_ONCE(sk->sk_pacing_rate, rate); } -/* override sysctl_tcp_min_tso_segs */ -__bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk) +/* Return the number of segments BBR would like in a TSO/GSO skb, given a + * particular max gso size as a constraint. TODO: make this simpler and more + * consistent by switching bbr to just call tcp_tso_autosize(). + */ +static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now, + u32 gso_max_size) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 segs, r; + u64 bytes; + + /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */ + bytes = READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift); + + /* Budget a TSO/GSO burst size allowance based on min_rtt. For every + * K = 2^tso_rtt_shift microseconds of min_rtt, halve the burst. + * The min_rtt-based burst allowance is: 64 KBytes / 2^(min_rtt/K) + */ + if (bbr_param(sk, tso_rtt_shift)) { + r = bbr->min_rtt_us >> bbr_param(sk, tso_rtt_shift); + if (r < BITS_PER_TYPE(u32)) /* prevent undefined behavior */ + bytes += GSO_LEGACY_MAX_SIZE >> r; + } + + bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER); + segs = max_t(u32, bytes / mss_now, + sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); + return segs; +} + +/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */ +__bpf_kfunc static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now) { - return READ_ONCE(sk->sk_pacing_rate) < (bbr_min_tso_rate >> 3) ? 1 : 2; + return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size); } +/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */ static u32 bbr_tso_segs_goal(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - u32 segs, bytes; - - /* Sort of tcp_tso_autosize() but ignoring - * driver provided sk_gso_max_size. - */ - bytes = min_t(unsigned long, - READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift), - GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER); - segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); - return min(segs, 0x7FU); + return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_LEGACY_MAX_SIZE); } /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ @@ -334,7 +533,9 @@ __bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - if (event == CA_EVENT_TX_START && tp->app_limited) { + if (event == CA_EVENT_TX_START) { + if (!tp->app_limited) + return; bbr->idle_restart = 1; bbr->ack_epoch_mstamp = tp->tcp_mstamp; bbr->ack_epoch_acked = 0; @@ -345,6 +546,16 @@ __bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT); else if (bbr->mode == BBR_PROBE_RTT) bbr_check_probe_rtt_done(sk); + } else if ((event == CA_EVENT_ECN_IS_CE || + event == CA_EVENT_ECN_NO_CE) && + bbr_can_use_ecn(sk) && + bbr_param(sk, precise_ece_ack)) { + u32 state = bbr->ce_state; + dctcp_ece_ack_update(sk, event, &bbr->prior_rcv_nxt, &state); + bbr->ce_state = state; + } else if (event == CA_EVENT_TLP_RECOVERY && + bbr_param(sk, loss_probe_recovery)) { + bbr_run_loss_probe_recovery(sk); } } @@ -367,10 +578,10 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain) * default. This should only happen when the connection is not using TCP * timestamps and has retransmitted all of the SYN/SYNACK/data packets * ACKed so far. In this case, an RTO can cut cwnd to 1, in which - * case we need to slow-start up toward something safe: TCP_INIT_CWND. + * case we need to slow-start up toward something safe: initial cwnd. */ if (unlikely(bbr->min_rtt_us == ~0U)) /* no valid RTT samples yet? */ - return TCP_INIT_CWND; /* be safe: cap at default initial cwnd*/ + return bbr->init_cwnd; /* be safe: cap at initial cwnd */ w = (u64)bw * bbr->min_rtt_us; @@ -387,23 +598,23 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain) * - one skb in sending host Qdisc, * - one skb in sending host TSO/GSO engine * - one skb being received by receiver host LRO/GRO/delayed-ACK engine - * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because - * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, + * Don't worry, at low rates this won't bloat cwnd because + * in such cases tso_segs_goal is small. The minimum cwnd is 4 packets, * which allows 2 outstanding 2-packet sequences, to try to keep pipe * full even with ACK-every-other-packet delayed ACKs. */ static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd) { struct bbr *bbr = inet_csk_ca(sk); + u32 tso_segs_goal; - /* Allow enough full-sized skbs in flight to utilize end systems. */ - cwnd += 3 * bbr_tso_segs_goal(sk); - - /* Reduce delayed ACKs by rounding up cwnd to the next even number. */ - cwnd = (cwnd + 1) & ~1U; + tso_segs_goal = 3 * bbr_tso_segs_goal(sk); + /* Allow enough full-sized skbs in flight to utilize end systems. */ + cwnd = max_t(u32, cwnd, tso_segs_goal); + cwnd = max_t(u32, cwnd, bbr_param(sk, cwnd_min_target)); /* Ensure gain cycling gets inflight above BDP even for small BDPs. */ - if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == 0) + if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP) cwnd += 2; return cwnd; @@ -458,10 +669,10 @@ static u32 bbr_ack_aggregation_cwnd(struct sock *sk) { u32 max_aggr_cwnd, aggr_cwnd = 0; - if (bbr_extra_acked_gain && bbr_full_bw_reached(sk)) { + if (bbr_param(sk, extra_acked_gain)) { max_aggr_cwnd = ((u64)bbr_bw(sk) * bbr_extra_acked_max_us) / BW_UNIT; - aggr_cwnd = (bbr_extra_acked_gain * bbr_extra_acked(sk)) + aggr_cwnd = (bbr_param(sk, extra_acked_gain) * bbr_extra_acked(sk)) >> BBR_SCALE; aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd); } @@ -469,66 +680,27 @@ static u32 bbr_ack_aggregation_cwnd(struct sock *sk) return aggr_cwnd; } -/* An optimization in BBR to reduce losses: On the first round of recovery, we - * follow the packet conservation principle: send P packets per P packets acked. - * After that, we slow-start and send at most 2*P packets per P packets acked. - * After recovery finishes, or upon undo, we restore the cwnd we had when - * recovery started (capped by the target cwnd based on estimated BDP). - * - * TODO(ycheng/ncardwell): implement a rate-based approach. - */ -static bool bbr_set_cwnd_to_recover_or_restore( - struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd) +/* Returns the cwnd for PROBE_RTT mode. */ +static u32 bbr_probe_rtt_cwnd(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state; - u32 cwnd = tcp_snd_cwnd(tp); - - /* An ACK for P pkts should release at most 2*P packets. We do this - * in two steps. First, here we deduct the number of lost packets. - * Then, in bbr_set_cwnd() we slow start up toward the target cwnd. - */ - if (rs->losses > 0) - cwnd = max_t(s32, cwnd - rs->losses, 1); - - if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) { - /* Starting 1st round of Recovery, so do packet conservation. */ - bbr->packet_conservation = 1; - bbr->next_rtt_delivered = tp->delivered; /* start round now */ - /* Cut unused cwnd from app behavior, TSQ, or TSO deferral: */ - cwnd = tcp_packets_in_flight(tp) + acked; - } else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) { - /* Exiting loss recovery; restore cwnd saved before recovery. */ - cwnd = max(cwnd, bbr->prior_cwnd); - bbr->packet_conservation = 0; - } - bbr->prev_ca_state = state; - - if (bbr->packet_conservation) { - *new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked); - return true; /* yes, using packet conservation */ - } - *new_cwnd = cwnd; - return false; + return max_t(u32, bbr_param(sk, cwnd_min_target), + bbr_bdp(sk, bbr_bw(sk), bbr_param(sk, probe_rtt_cwnd_gain))); } /* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss * has drawn us down below target), or snap down to target if we're above it. */ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, - u32 acked, u32 bw, int gain) + u32 acked, u32 bw, int gain, u32 cwnd, + struct bbr_context *ctx) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - u32 cwnd = tcp_snd_cwnd(tp), target_cwnd = 0; + u32 target_cwnd = 0; if (!acked) goto done; /* no packet fully ACKed; just apply caps */ - if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd)) - goto done; - target_cwnd = bbr_bdp(sk, bw, gain); /* Increment the cwnd to account for excess ACKed data that seems @@ -537,74 +709,26 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, target_cwnd += bbr_ack_aggregation_cwnd(sk); target_cwnd = bbr_quantization_budget(sk, target_cwnd); - /* If we're below target cwnd, slow start cwnd toward target cwnd. */ - if (bbr_full_bw_reached(sk)) /* only cut cwnd if we filled the pipe */ - cwnd = min(cwnd + acked, target_cwnd); - else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND) - cwnd = cwnd + acked; - cwnd = max(cwnd, bbr_cwnd_min_target); + /* Update cwnd and enable fast path if cwnd reaches target_cwnd. */ + bbr->try_fast_path = 0; + if (bbr_full_bw_reached(sk)) { /* only cut cwnd if we filled the pipe */ + cwnd += acked; + if (cwnd >= target_cwnd) { + cwnd = target_cwnd; + bbr->try_fast_path = 1; + } + } else if (cwnd < target_cwnd || cwnd < 2 * bbr->init_cwnd) { + cwnd += acked; + } else { + bbr->try_fast_path = 1; + } + cwnd = max_t(u32, cwnd, bbr_param(sk, cwnd_min_target)); done: - tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); /* apply global cap */ + tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); /* global cap */ if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */ - tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), bbr_cwnd_min_target)); -} - -/* End cycle phase if it's time and/or we hit the phase's in-flight target. */ -static bool bbr_is_next_cycle_phase(struct sock *sk, - const struct rate_sample *rs) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - bool is_full_length = - tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) > - bbr->min_rtt_us; - u32 inflight, bw; - - /* The pacing_gain of 1.0 paces at the estimated bw to try to fully - * use the pipe without increasing the queue. - */ - if (bbr->pacing_gain == BBR_UNIT) - return is_full_length; /* just use wall clock time */ - - inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight); - bw = bbr_max_bw(sk); - - /* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at - * least pacing_gain*BDP; this may take more than min_rtt if min_rtt is - * small (e.g. on a LAN). We do not persist if packets are lost, since - * a path with small buffers may not hold that much. - */ - if (bbr->pacing_gain > BBR_UNIT) - return is_full_length && - (rs->losses || /* perhaps pacing_gain*BDP won't fit */ - inflight >= bbr_inflight(sk, bw, bbr->pacing_gain)); - - /* A pacing_gain < 1.0 tries to drain extra queue we added if bw - * probing didn't find more bw. If inflight falls to match BDP then we - * estimate queue is drained; persisting would underutilize the pipe. - */ - return is_full_length || - inflight <= bbr_inflight(sk, bw, BBR_UNIT); -} - -static void bbr_advance_cycle_phase(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - - bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); - bbr->cycle_mstamp = tp->delivered_mstamp; -} - -/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ -static void bbr_update_cycle_phase(struct sock *sk, - const struct rate_sample *rs) -{ - struct bbr *bbr = inet_csk_ca(sk); - - if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs)) - bbr_advance_cycle_phase(sk); + tcp_snd_cwnd_set(tp, min_t(u32, tcp_snd_cwnd(tp), + bbr_probe_rtt_cwnd(sk))); } static void bbr_reset_startup_mode(struct sock *sk) @@ -614,191 +738,49 @@ static void bbr_reset_startup_mode(struct sock *sk) bbr->mode = BBR_STARTUP; } -static void bbr_reset_probe_bw_mode(struct sock *sk) -{ - struct bbr *bbr = inet_csk_ca(sk); - - bbr->mode = BBR_PROBE_BW; - bbr->cycle_idx = CYCLE_LEN - 1 - get_random_u32_below(bbr_cycle_rand); - bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */ -} - -static void bbr_reset_mode(struct sock *sk) -{ - if (!bbr_full_bw_reached(sk)) - bbr_reset_startup_mode(sk); - else - bbr_reset_probe_bw_mode(sk); -} - -/* Start a new long-term sampling interval. */ -static void bbr_reset_lt_bw_sampling_interval(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - - bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC); - bbr->lt_last_delivered = tp->delivered; - bbr->lt_last_lost = tp->lost; - bbr->lt_rtt_cnt = 0; -} - -/* Completely reset long-term bandwidth sampling. */ -static void bbr_reset_lt_bw_sampling(struct sock *sk) -{ - struct bbr *bbr = inet_csk_ca(sk); - - bbr->lt_bw = 0; - bbr->lt_use_bw = 0; - bbr->lt_is_sampling = false; - bbr_reset_lt_bw_sampling_interval(sk); -} - -/* Long-term bw sampling interval is done. Estimate whether we're policed. */ -static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw) -{ - struct bbr *bbr = inet_csk_ca(sk); - u32 diff; - - if (bbr->lt_bw) { /* do we have bw from a previous interval? */ - /* Is new bw close to the lt_bw from the previous interval? */ - diff = abs(bw - bbr->lt_bw); - if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) || - (bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <= - bbr_lt_bw_diff)) { - /* All criteria are met; estimate we're policed. */ - bbr->lt_bw = (bw + bbr->lt_bw) >> 1; /* avg 2 intvls */ - bbr->lt_use_bw = 1; - bbr->pacing_gain = BBR_UNIT; /* try to avoid drops */ - bbr->lt_rtt_cnt = 0; - return; - } - } - bbr->lt_bw = bw; - bbr_reset_lt_bw_sampling_interval(sk); -} - -/* Token-bucket traffic policers are common (see "An Internet-Wide Analysis of - * Traffic Policing", SIGCOMM 2016). BBR detects token-bucket policers and - * explicitly models their policed rate, to reduce unnecessary losses. We - * estimate that we're policed if we see 2 consecutive sampling intervals with - * consistent throughput and high packet loss. If we think we're being policed, - * set lt_bw to the "long-term" average delivery rate from those 2 intervals. +/* See if we have reached next round trip. Upon start of the new round, + * returns packets delivered since previous round start plus this ACK. */ -static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - u32 lost, delivered; - u64 bw; - u32 t; - - if (bbr->lt_use_bw) { /* already using long-term rate, lt_bw? */ - if (bbr->mode == BBR_PROBE_BW && bbr->round_start && - ++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) { - bbr_reset_lt_bw_sampling(sk); /* stop using lt_bw */ - bbr_reset_probe_bw_mode(sk); /* restart gain cycling */ - } - return; - } - - /* Wait for the first loss before sampling, to let the policer exhaust - * its tokens and estimate the steady-state rate allowed by the policer. - * Starting samples earlier includes bursts that over-estimate the bw. - */ - if (!bbr->lt_is_sampling) { - if (!rs->losses) - return; - bbr_reset_lt_bw_sampling_interval(sk); - bbr->lt_is_sampling = true; - } - - /* To avoid underestimates, reset sampling if we run out of data. */ - if (rs->is_app_limited) { - bbr_reset_lt_bw_sampling(sk); - return; - } - - if (bbr->round_start) - bbr->lt_rtt_cnt++; /* count round trips in this interval */ - if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts) - return; /* sampling interval needs to be longer */ - if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) { - bbr_reset_lt_bw_sampling(sk); /* interval is too long */ - return; - } - - /* End sampling interval when a packet is lost, so we estimate the - * policer tokens were exhausted. Stopping the sampling before the - * tokens are exhausted under-estimates the policed rate. - */ - if (!rs->losses) - return; - - /* Calculate packets lost and delivered in sampling interval. */ - lost = tp->lost - bbr->lt_last_lost; - delivered = tp->delivered - bbr->lt_last_delivered; - /* Is loss rate (lost/delivered) >= lt_loss_thresh? If not, wait. */ - if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered) - return; - - /* Find average delivery rate in this sampling interval. */ - t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp; - if ((s32)t < 1) - return; /* interval is less than one ms, so wait */ - /* Check if can multiply without overflow */ - if (t >= ~0U / USEC_PER_MSEC) { - bbr_reset_lt_bw_sampling(sk); /* interval too long; reset */ - return; - } - t *= USEC_PER_MSEC; - bw = (u64)delivered * BW_UNIT; - do_div(bw, t); - bbr_lt_bw_interval_done(sk, bw); -} - -/* Estimate the bandwidth based on how fast packets are delivered */ -static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) +static u32 bbr_update_round_start(struct sock *sk, + const struct rate_sample *rs, struct bbr_context *ctx) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - u64 bw; + u32 round_delivered = 0; bbr->round_start = 0; - if (rs->delivered < 0 || rs->interval_us <= 0) - return; /* Not a valid observation */ /* See if we've reached the next RTT */ - if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) { + if (rs->interval_us > 0 && + !before(rs->prior_delivered, bbr->next_rtt_delivered)) { + round_delivered = tp->delivered - bbr->next_rtt_delivered; bbr->next_rtt_delivered = tp->delivered; - bbr->rtt_cnt++; bbr->round_start = 1; - bbr->packet_conservation = 0; } + return round_delivered; +} - bbr_lt_bw_sampling(sk, rs); +/* Calculate the bandwidth based on how fast packets are delivered */ +static void bbr_calculate_bw_sample(struct sock *sk, + const struct rate_sample *rs, struct bbr_context *ctx) +{ + u64 bw = 0; /* Divide delivered by the interval to find a (lower bound) bottleneck * bandwidth sample. Delivered is in packets and interval_us in uS and * ratio will be <<1 for most connections. So delivered is first scaled. + * Round up to allow growth at low rates, even with integer division. */ - bw = div64_long((u64)rs->delivered * BW_UNIT, rs->interval_us); - - /* If this sample is application-limited, it is likely to have a very - * low delivered count that represents application behavior rather than - * the available network rate. Such a sample could drag down estimated - * bw, causing needless slow-down. Thus, to continue to send at the - * last measured network rate, we filter out app-limited samples unless - * they describe the path bw at least as well as our bw model. - * - * So the goal during app-limited phase is to proceed with the best - * network rate no matter how long. We automatically leave this - * phase when app writes faster than the network can deliver :) - */ - if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) { - /* Incorporate new sample into our max bw filter. */ - minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw); + if (rs->interval_us > 0) { + if (WARN_ONCE(rs->delivered < 0, + "negative delivered: %d interval_us: %ld\n", + rs->delivered, rs->interval_us)) + return; + + bw = DIV_ROUND_UP_ULL((u64)rs->delivered * BW_UNIT, rs->interval_us); } + + ctx->sample_bw = bw; } /* Estimates the windowed max degree of ack aggregation. @@ -812,7 +794,7 @@ static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) * * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms). * Max filter is an approximate sliding window of 5-10 (packet timed) round - * trips. + * trips for non-startup phase, and 1-2 round trips for startup. */ static void bbr_update_ack_aggregation(struct sock *sk, const struct rate_sample *rs) @@ -820,15 +802,19 @@ static void bbr_update_ack_aggregation(struct sock *sk, u32 epoch_us, expected_acked, extra_acked; struct bbr *bbr = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); + u32 extra_acked_win_rtts_thresh = bbr_param(sk, extra_acked_win_rtts); - if (!bbr_extra_acked_gain || rs->acked_sacked <= 0 || + if (!bbr_param(sk, extra_acked_gain) || rs->acked_sacked <= 0 || rs->delivered < 0 || rs->interval_us <= 0) return; if (bbr->round_start) { bbr->extra_acked_win_rtts = min(0x1F, bbr->extra_acked_win_rtts + 1); - if (bbr->extra_acked_win_rtts >= bbr_extra_acked_win_rtts) { + if (!bbr_full_bw_reached(sk)) + extra_acked_win_rtts_thresh = 1; + if (bbr->extra_acked_win_rtts >= + extra_acked_win_rtts_thresh) { bbr->extra_acked_win_rtts = 0; bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ? 0 : 1; @@ -862,49 +848,6 @@ static void bbr_update_ack_aggregation(struct sock *sk, bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked; } -/* Estimate when the pipe is full, using the change in delivery rate: BBR - * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by - * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited - * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the - * higher rwin, 3: we get higher delivery rate samples. Or transient - * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar - * design goal, but uses delay and inter-ACK spacing instead of bandwidth. - */ -static void bbr_check_full_bw_reached(struct sock *sk, - const struct rate_sample *rs) -{ - struct bbr *bbr = inet_csk_ca(sk); - u32 bw_thresh; - - if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited) - return; - - bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE; - if (bbr_max_bw(sk) >= bw_thresh) { - bbr->full_bw = bbr_max_bw(sk); - bbr->full_bw_cnt = 0; - return; - } - ++bbr->full_bw_cnt; - bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt; -} - -/* If pipe is probably full, drain the queue and then enter steady-state. */ -static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) -{ - struct bbr *bbr = inet_csk_ca(sk); - - if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { - bbr->mode = BBR_DRAIN; /* drain queue we created */ - tcp_sk(sk)->snd_ssthresh = - bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); - } /* fall through to check if in-flight is already small: */ - if (bbr->mode == BBR_DRAIN && - bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= - bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) - bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */ -} - static void bbr_check_probe_rtt_done(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -914,9 +857,9 @@ static void bbr_check_probe_rtt_done(struct sock *sk) after(tcp_jiffies32, bbr->probe_rtt_done_stamp))) return; - bbr->min_rtt_stamp = tcp_jiffies32; /* wait a while until PROBE_RTT */ + bbr->probe_rtt_min_stamp = tcp_jiffies32; /* schedule next PROBE_RTT */ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd)); - bbr_reset_mode(sk); + bbr_exit_probe_rtt(sk); } /* The goal of PROBE_RTT mode is to have BBR flows cooperatively and @@ -942,23 +885,35 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - bool filter_expired; + bool probe_rtt_expired, min_rtt_expired; + u32 expire; - /* Track min RTT seen in the min_rtt_win_sec filter window: */ - filter_expired = after(tcp_jiffies32, - bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); + /* Track min RTT in probe_rtt_win_ms to time next PROBE_RTT state. */ + expire = bbr->probe_rtt_min_stamp + + msecs_to_jiffies(bbr_param(sk, probe_rtt_win_ms)); + probe_rtt_expired = after(tcp_jiffies32, expire); if (rs->rtt_us >= 0 && - (rs->rtt_us < bbr->min_rtt_us || - (filter_expired && !rs->is_ack_delayed))) { - bbr->min_rtt_us = rs->rtt_us; - bbr->min_rtt_stamp = tcp_jiffies32; + (rs->rtt_us < bbr->probe_rtt_min_us || + (probe_rtt_expired && !rs->is_ack_delayed))) { + bbr->probe_rtt_min_us = rs->rtt_us; + bbr->probe_rtt_min_stamp = tcp_jiffies32; + } + /* Track min RTT seen in the min_rtt_win_sec filter window: */ + expire = bbr->min_rtt_stamp + bbr_param(sk, min_rtt_win_sec) * HZ; + min_rtt_expired = after(tcp_jiffies32, expire); + if (bbr->probe_rtt_min_us <= bbr->min_rtt_us || + min_rtt_expired) { + bbr->min_rtt_us = bbr->probe_rtt_min_us; + bbr->min_rtt_stamp = bbr->probe_rtt_min_stamp; } - if (bbr_probe_rtt_mode_ms > 0 && filter_expired && + if (bbr_param(sk, probe_rtt_mode_ms) > 0 && probe_rtt_expired && !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) { bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */ bbr_save_cwnd(sk); /* note cwnd so we can restore it */ bbr->probe_rtt_done_stamp = 0; + bbr->ack_phase = BBR_ACKS_PROBE_STOPPING; + bbr->next_rtt_delivered = tp->delivered; } if (bbr->mode == BBR_PROBE_RTT) { @@ -967,9 +922,9 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; /* Maintain min packets in flight for max(200 ms, 1 round). */ if (!bbr->probe_rtt_done_stamp && - tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) { + tcp_packets_in_flight(tp) <= bbr_probe_rtt_cwnd(sk)) { bbr->probe_rtt_done_stamp = tcp_jiffies32 + - msecs_to_jiffies(bbr_probe_rtt_mode_ms); + msecs_to_jiffies(bbr_param(sk, probe_rtt_mode_ms)); bbr->probe_rtt_round_done = 0; bbr->next_rtt_delivered = tp->delivered; } else if (bbr->probe_rtt_done_stamp) { @@ -990,18 +945,20 @@ static void bbr_update_gains(struct sock *sk) switch (bbr->mode) { case BBR_STARTUP: - bbr->pacing_gain = bbr_high_gain; - bbr->cwnd_gain = bbr_high_gain; + bbr->pacing_gain = bbr_param(sk, startup_pacing_gain); + bbr->cwnd_gain = bbr_param(sk, startup_cwnd_gain); break; case BBR_DRAIN: - bbr->pacing_gain = bbr_drain_gain; /* slow, to drain */ - bbr->cwnd_gain = bbr_high_gain; /* keep cwnd */ + bbr->pacing_gain = bbr_param(sk, drain_gain); /* slow, to drain */ + bbr->cwnd_gain = bbr_param(sk, startup_cwnd_gain); /* keep cwnd */ break; case BBR_PROBE_BW: - bbr->pacing_gain = (bbr->lt_use_bw ? - BBR_UNIT : - bbr_pacing_gain[bbr->cycle_idx]); - bbr->cwnd_gain = bbr_cwnd_gain; + bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx]; + bbr->cwnd_gain = bbr_param(sk, cwnd_gain); + if (bbr_param(sk, bw_probe_cwnd_gain) && + bbr->cycle_idx == BBR_BW_PROBE_UP) + bbr->cwnd_gain += + BBR_UNIT * bbr_param(sk, bw_probe_cwnd_gain) / 4; break; case BBR_PROBE_RTT: bbr->pacing_gain = BBR_UNIT; @@ -1013,144 +970,1387 @@ static void bbr_update_gains(struct sock *sk) } } -static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) +__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk) { - bbr_update_bw(sk, rs); - bbr_update_ack_aggregation(sk, rs); - bbr_update_cycle_phase(sk, rs); - bbr_check_full_bw_reached(sk, rs); - bbr_check_drain(sk, rs); - bbr_update_min_rtt(sk, rs); - bbr_update_gains(sk); + /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ + return 3; } -__bpf_kfunc static void bbr_main(struct sock *sk, const struct rate_sample *rs) +/* Incorporate a new bw sample into the current window of our max filter. */ +static void bbr_take_max_bw_sample(struct sock *sk, u32 bw) { struct bbr *bbr = inet_csk_ca(sk); - u32 bw; - - bbr_update_model(sk, rs); - bw = bbr_bw(sk); - bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); - bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain); + bbr->bw_hi[1] = max(bw, bbr->bw_hi[1]); } -__bpf_kfunc static void bbr_init(struct sock *sk) +/* Keep max of last 1-2 cycles. Each PROBE_BW cycle, flip filter window. */ +static void bbr_advance_max_bw_filter(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - bbr->prior_cwnd = 0; - tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; - bbr->rtt_cnt = 0; - bbr->next_rtt_delivered = tp->delivered; - bbr->prev_ca_state = TCP_CA_Open; - bbr->packet_conservation = 0; - - bbr->probe_rtt_done_stamp = 0; - bbr->probe_rtt_round_done = 0; - bbr->min_rtt_us = tcp_min_rtt(tp); - bbr->min_rtt_stamp = tcp_jiffies32; - - minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ + if (!bbr->bw_hi[1]) + return; /* no samples in this window; remember old window */ + bbr->bw_hi[0] = bbr->bw_hi[1]; + bbr->bw_hi[1] = 0; +} - bbr->has_seen_rtt = 0; - bbr_init_pacing_rate_from_rtt(sk); +/* Reset the estimator for reaching full bandwidth based on bw plateau. */ +static void bbr_reset_full_bw(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); - bbr->round_start = 0; - bbr->idle_restart = 0; - bbr->full_bw_reached = 0; bbr->full_bw = 0; bbr->full_bw_cnt = 0; - bbr->cycle_mstamp = 0; - bbr->cycle_idx = 0; - bbr_reset_lt_bw_sampling(sk); - bbr_reset_startup_mode(sk); + bbr->full_bw_now = 0; +} - bbr->ack_epoch_mstamp = tp->tcp_mstamp; - bbr->ack_epoch_acked = 0; - bbr->extra_acked_win_rtts = 0; - bbr->extra_acked_win_idx = 0; - bbr->extra_acked[0] = 0; - bbr->extra_acked[1] = 0; +/* How much do we want in flight? Our BDP, unless congestion cut cwnd. */ +static u32 bbr_target_inflight(struct sock *sk) +{ + u32 bdp = bbr_inflight(sk, bbr_bw(sk), BBR_UNIT); - cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); + return min(bdp, tcp_sk(sk)->snd_cwnd); } -__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk) +static bool bbr_is_probing_bandwidth(struct sock *sk) { - /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ - return 3; + struct bbr *bbr = inet_csk_ca(sk); + + return (bbr->mode == BBR_STARTUP) || + (bbr->mode == BBR_PROBE_BW && + (bbr->cycle_idx == BBR_BW_PROBE_REFILL || + bbr->cycle_idx == BBR_BW_PROBE_UP)); +} + +/* Has the given amount of time elapsed since we marked the phase start? */ +static bool bbr_has_elapsed_in_phase(const struct sock *sk, u32 interval_us) +{ + const struct tcp_sock *tp = tcp_sk(sk); + const struct bbr *bbr = inet_csk_ca(sk); + + return tcp_stamp_us_delta(tp->tcp_mstamp, + bbr->cycle_mstamp + interval_us) > 0; +} + +static void bbr_handle_queue_too_high_in_startup(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 bdp; /* estimated BDP in packets, with quantization budget */ + + bbr->full_bw_reached = 1; + + bdp = bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); + bbr->inflight_hi = max(bdp, bbr->inflight_latest); +} + +/* Exit STARTUP upon N consecutive rounds with ECN mark rate > ecn_thresh. */ +static void bbr_check_ecn_too_high_in_startup(struct sock *sk, u32 ce_ratio) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr_full_bw_reached(sk) || !bbr->ecn_eligible || + !bbr_param(sk, full_ecn_cnt) || !bbr_param(sk, ecn_thresh)) + return; + + if (ce_ratio >= bbr_param(sk, ecn_thresh)) + bbr->startup_ecn_rounds++; + else + bbr->startup_ecn_rounds = 0; + + if (bbr->startup_ecn_rounds >= bbr_param(sk, full_ecn_cnt)) { + bbr_handle_queue_too_high_in_startup(sk); + return; + } +} + +/* Updates ecn_alpha and returns ce_ratio. -1 if not available. */ +static int bbr_update_ecn_alpha(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); + struct bbr *bbr = inet_csk_ca(sk); + s32 delivered, delivered_ce; + u64 alpha, ce_ratio; + u32 gain; + bool want_ecn_alpha; + + /* See if we should use ECN sender logic for this connection. */ + if (!bbr->ecn_eligible && bbr_can_use_ecn(sk) && + bbr_param(sk, ecn_factor) && + (bbr->min_rtt_us <= bbr_ecn_max_rtt_us || + !bbr_ecn_max_rtt_us)) + bbr->ecn_eligible = 1; + + /* Skip updating alpha only if not ECN-eligible and PLB is disabled. */ + want_ecn_alpha = (bbr->ecn_eligible || + (bbr_can_use_ecn(sk) && + READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled))); + if (!want_ecn_alpha) + return -1; + + delivered = tp->delivered - bbr->alpha_last_delivered; + delivered_ce = tp->delivered_ce - bbr->alpha_last_delivered_ce; + + if (delivered == 0 || /* avoid divide by zero */ + WARN_ON_ONCE(delivered < 0 || delivered_ce < 0)) /* backwards? */ + return -1; + + BUILD_BUG_ON(BBR_SCALE != TCP_PLB_SCALE); + ce_ratio = (u64)delivered_ce << BBR_SCALE; + do_div(ce_ratio, delivered); + + gain = bbr_param(sk, ecn_alpha_gain); + alpha = ((BBR_UNIT - gain) * bbr->ecn_alpha) >> BBR_SCALE; + alpha += (gain * ce_ratio) >> BBR_SCALE; + bbr->ecn_alpha = min_t(u32, alpha, BBR_UNIT); + + bbr->alpha_last_delivered = tp->delivered; + bbr->alpha_last_delivered_ce = tp->delivered_ce; + + bbr_check_ecn_too_high_in_startup(sk, ce_ratio); + return (int)ce_ratio; } -/* In theory BBR does not need to undo the cwnd since it does not - * always reduce cwnd on losses (see bbr_main()). Keep it for now. +/* Protective Load Balancing (PLB). PLB rehashes outgoing data (to a new IPv6 + * flow label) if it encounters sustained congestion in the form of ECN marks. */ -__bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk) +static void bbr_plb(struct sock *sk, const struct rate_sample *rs, int ce_ratio) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->round_start && ce_ratio >= 0) + tcp_plb_update_state(sk, &bbr->plb, ce_ratio); + + tcp_plb_check_rehash(sk, &bbr->plb); +} + +/* Each round trip of BBR_BW_PROBE_UP, double volume of probing data. */ +static void bbr_raise_inflight_hi_slope(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 growth_this_round, cnt; + + /* Calculate "slope": packets S/Acked per inflight_hi increment. */ + growth_this_round = 1 << bbr->bw_probe_up_rounds; + bbr->bw_probe_up_rounds = min(bbr->bw_probe_up_rounds + 1, 30); + cnt = tcp_snd_cwnd(tp) / growth_this_round; + cnt = max(cnt, 1U); + bbr->bw_probe_up_cnt = cnt; +} + +/* In BBR_BW_PROBE_UP, not seeing high loss/ECN/queue, so raise inflight_hi. */ +static void bbr_probe_inflight_hi_upward(struct sock *sk, + const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 delta; + + if (!tp->is_cwnd_limited || tcp_snd_cwnd(tp) < bbr->inflight_hi) + return; /* not fully using inflight_hi, so don't grow it */ + + /* For each bw_probe_up_cnt packets ACKed, increase inflight_hi by 1. */ + bbr->bw_probe_up_acks += rs->acked_sacked; + if (bbr->bw_probe_up_acks >= bbr->bw_probe_up_cnt) { + delta = bbr->bw_probe_up_acks / bbr->bw_probe_up_cnt; + bbr->bw_probe_up_acks -= delta * bbr->bw_probe_up_cnt; + bbr->inflight_hi += delta; + bbr->try_fast_path = 0; /* Need to update cwnd */ + } + + if (bbr->round_start) + bbr_raise_inflight_hi_slope(sk); +} + +/* Does loss/ECN rate for this sample say inflight is "too high"? + * This is used by both the bbr_check_loss_too_high_in_startup() function, + * which can be used in either v1 or v2, and the PROBE_UP phase of v2, which + * uses it to notice when loss/ECN rates suggest inflight is too high. + */ +static bool bbr_is_inflight_too_high(const struct sock *sk, + const struct rate_sample *rs) +{ + const struct bbr *bbr = inet_csk_ca(sk); + u32 loss_thresh, ecn_thresh; + + if (rs->lost > 0 && rs->tx_in_flight) { + loss_thresh = (u64)rs->tx_in_flight * bbr_param(sk, loss_thresh) >> + BBR_SCALE; + if (rs->lost > loss_thresh) { + return true; + } + } + + if (rs->delivered_ce > 0 && rs->delivered > 0 && + bbr->ecn_eligible && bbr_param(sk, ecn_thresh)) { + ecn_thresh = (u64)rs->delivered * bbr_param(sk, ecn_thresh) >> + BBR_SCALE; + if (rs->delivered_ce > ecn_thresh) { + return true; + } + } + + return false; +} + +/* Calculate the tx_in_flight level that corresponded to excessive loss. + * We find "lost_prefix" segs of the skb where loss rate went too high, + * by solving for "lost_prefix" in the following equation: + * lost / inflight >= loss_thresh + * (lost_prev + lost_prefix) / (inflight_prev + lost_prefix) >= loss_thresh + * Then we take that equation, convert it to fixed point, and + * round up to the nearest packet. + */ +static u32 bbr_inflight_hi_from_lost_skb(const struct sock *sk, + const struct rate_sample *rs, + const struct sk_buff *skb) +{ + const struct tcp_sock *tp = tcp_sk(sk); + u32 loss_thresh = bbr_param(sk, loss_thresh); + u32 pcount, divisor, inflight_hi; + s32 inflight_prev, lost_prev; + u64 loss_budget, lost_prefix; + + pcount = tcp_skb_pcount(skb); + + /* How much data was in flight before this skb? */ + inflight_prev = rs->tx_in_flight - pcount; + if (inflight_prev < 0) { + WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious( + pcount, + TCP_SKB_CB(skb)->sacked, + rs->tx_in_flight), + "tx_in_flight: %u pcount: %u reneg: %u", + rs->tx_in_flight, pcount, tcp_sk(sk)->is_sack_reneg); + return ~0U; + } + + /* How much inflight data was marked lost before this skb? */ + lost_prev = rs->lost - pcount; + if (WARN_ONCE(lost_prev < 0, + "cwnd: %u ca: %d out: %u lost: %u pif: %u " + "tx_in_flight: %u tx.lost: %u tp->lost: %u rs->lost: %d " + "lost_prev: %d pcount: %d seq: %u end_seq: %u reneg: %u", + tcp_snd_cwnd(tp), inet_csk(sk)->icsk_ca_state, + tp->packets_out, tp->lost_out, tcp_packets_in_flight(tp), + rs->tx_in_flight, TCP_SKB_CB(skb)->tx.lost, tp->lost, + rs->lost, lost_prev, pcount, + TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, + tp->is_sack_reneg)) + return ~0U; + + /* At what prefix of this lost skb did losss rate exceed loss_thresh? */ + loss_budget = (u64)inflight_prev * loss_thresh + BBR_UNIT - 1; + loss_budget >>= BBR_SCALE; + if (lost_prev >= loss_budget) { + lost_prefix = 0; /* previous losses crossed loss_thresh */ + } else { + lost_prefix = loss_budget - lost_prev; + lost_prefix <<= BBR_SCALE; + divisor = BBR_UNIT - loss_thresh; + if (WARN_ON_ONCE(!divisor)) /* loss_thresh is 8 bits */ + return ~0U; + do_div(lost_prefix, divisor); + } + + inflight_hi = inflight_prev + lost_prefix; + return inflight_hi; +} + +/* If loss/ECN rates during probing indicated we may have overfilled a + * buffer, return an operating point that tries to leave unutilized headroom in + * the path for other flows, for fairness convergence and lower RTTs and loss. + */ +static u32 bbr_inflight_with_headroom(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 headroom, headroom_fraction; + + if (bbr->inflight_hi == ~0U) + return ~0U; + + headroom_fraction = bbr_param(sk, inflight_headroom); + headroom = ((u64)bbr->inflight_hi * headroom_fraction) >> BBR_SCALE; + headroom = max(headroom, 1U); + return max_t(s32, bbr->inflight_hi - headroom, + bbr_param(sk, cwnd_min_target)); +} + +/* Bound cwnd to a sensible level, based on our current probing state + * machine phase and model of a good inflight level (inflight_lo, inflight_hi). + */ +static void bbr_bound_cwnd_for_inflight_model(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 cap; + + /* tcp_rcv_synsent_state_process() currently calls tcp_ack() + * and thus cong_control() without first initializing us(!). + */ + if (!bbr->initialized) + return; + + cap = ~0U; + if (bbr->mode == BBR_PROBE_BW && + bbr->cycle_idx != BBR_BW_PROBE_CRUISE) { + /* Probe to see if more packets fit in the path. */ + cap = bbr->inflight_hi; + } else { + if (bbr->mode == BBR_PROBE_RTT || + (bbr->mode == BBR_PROBE_BW && + bbr->cycle_idx == BBR_BW_PROBE_CRUISE)) + cap = bbr_inflight_with_headroom(sk); + } + /* Adapt to any loss/ECN since our last bw probe. */ + cap = min(cap, bbr->inflight_lo); + + cap = max_t(u32, cap, bbr_param(sk, cwnd_min_target)); + tcp_snd_cwnd_set(tp, min(cap, tcp_snd_cwnd(tp))); +} + +/* How should we multiplicatively cut bw or inflight limits based on ECN? */ +static u32 bbr_ecn_cut(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return BBR_UNIT - + ((bbr->ecn_alpha * bbr_param(sk, ecn_factor)) >> BBR_SCALE); +} + +/* Init lower bounds if have not inited yet. */ +static void bbr_init_lower_bounds(struct sock *sk, bool init_bw) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (init_bw && bbr->bw_lo == ~0U) + bbr->bw_lo = bbr_max_bw(sk); + if (bbr->inflight_lo == ~0U) + bbr->inflight_lo = tcp_snd_cwnd(tp); +} + +/* Reduce bw and inflight to (1 - beta). */ +static void bbr_loss_lower_bounds(struct sock *sk, u32 *bw, u32 *inflight) +{ + struct bbr* bbr = inet_csk_ca(sk); + u32 loss_cut = BBR_UNIT - bbr_param(sk, beta); + + *bw = max_t(u32, bbr->bw_latest, + (u64)bbr->bw_lo * loss_cut >> BBR_SCALE); + *inflight = max_t(u32, bbr->inflight_latest, + (u64)bbr->inflight_lo * loss_cut >> BBR_SCALE); +} + +/* Reduce inflight to (1 - alpha*ecn_factor). */ +static void bbr_ecn_lower_bounds(struct sock *sk, u32 *inflight) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 ecn_cut = bbr_ecn_cut(sk); + + *inflight = (u64)bbr->inflight_lo * ecn_cut >> BBR_SCALE; +} + +/* Estimate a short-term lower bound on the capacity available now, based + * on measurements of the current delivery process and recent history. When we + * are seeing loss/ECN at times when we are not probing bw, then conservatively + * move toward flow balance by multiplicatively cutting our short-term + * estimated safe rate and volume of data (bw_lo and inflight_lo). We use a + * multiplicative decrease in order to converge to a lower capacity in time + * logarithmic in the magnitude of the decrease. + * + * However, we do not cut our short-term estimates lower than the current rate + * and volume of delivered data from this round trip, since from the current + * delivery process we can estimate the measured capacity available now. + * + * Anything faster than that approach would knowingly risk high loss, which can + * cause low bw for Reno/CUBIC and high loss recovery latency for + * request/response flows using any congestion control. + */ +static void bbr_adapt_lower_bounds(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 ecn_inflight_lo = ~0U; + + /* We only use lower-bound estimates when not probing bw. + * When probing we need to push inflight higher to probe bw. + */ + if (bbr_is_probing_bandwidth(sk)) + return; + + /* ECN response. */ + if (bbr->ecn_in_round && bbr_param(sk, ecn_factor)) { + bbr_init_lower_bounds(sk, false); + bbr_ecn_lower_bounds(sk, &ecn_inflight_lo); + } + + /* Loss response. */ + if (bbr->loss_in_round) { + bbr_init_lower_bounds(sk, true); + bbr_loss_lower_bounds(sk, &bbr->bw_lo, &bbr->inflight_lo); + } + + /* Adjust to the lower of the levels implied by loss/ECN. */ + bbr->inflight_lo = min(bbr->inflight_lo, ecn_inflight_lo); + bbr->bw_lo = max(1U, bbr->bw_lo); +} + +/* Reset any short-term lower-bound adaptation to congestion, so that we can + * push our inflight up. + */ +static void bbr_reset_lower_bounds(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->bw_lo = ~0U; + bbr->inflight_lo = ~0U; +} + +/* After bw probing (STARTUP/PROBE_UP), reset signals before entering a state + * machine phase where we adapt our lower bound based on congestion signals. + */ +static void bbr_reset_congestion_signals(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->loss_in_round = 0; + bbr->ecn_in_round = 0; + bbr->loss_in_cycle = 0; + bbr->ecn_in_cycle = 0; + bbr->bw_latest = 0; + bbr->inflight_latest = 0; +} + +static void bbr_exit_loss_recovery(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd)); + bbr->try_fast_path = 0; /* bound cwnd using latest model */ +} + +/* Update rate and volume of delivered data from latest round trip. */ +static void bbr_update_latest_delivery_signals( + struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->loss_round_start = 0; + if (rs->interval_us <= 0 || !rs->acked_sacked) + return; /* Not a valid observation */ + + bbr->bw_latest = max_t(u32, bbr->bw_latest, ctx->sample_bw); + bbr->inflight_latest = max_t(u32, bbr->inflight_latest, rs->delivered); + + if (!before(rs->prior_delivered, bbr->loss_round_delivered)) { + bbr->loss_round_delivered = tp->delivered; + bbr->loss_round_start = 1; /* mark start of new round trip */ + } +} + +/* Once per round, reset filter for latest rate and volume of delivered data. */ +static void bbr_advance_latest_delivery_signals( + struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + + /* If ACK matches a TLP retransmit, persist the filter. If we detect + * that a TLP retransmit plugged a tail loss, we'll want to remember + * how much data the path delivered before the tail loss. + */ + if (bbr->loss_round_start && !rs->is_acking_tlp_retrans_seq) { + bbr->bw_latest = ctx->sample_bw; + bbr->inflight_latest = rs->delivered; + } +} + +/* Update (most of) our congestion signals: track the recent rate and volume of + * delivered data, presence of loss, and EWMA degree of ECN marking. + */ +static void bbr_update_congestion_signals( + struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx) { struct bbr *bbr = inet_csk_ca(sk); + u64 bw; + + if (rs->interval_us <= 0 || !rs->acked_sacked) + return; /* Not a valid observation */ + bw = ctx->sample_bw; - bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */ + if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) + bbr_take_max_bw_sample(sk, bw); + + bbr->loss_in_round |= (rs->losses > 0); + + if (!bbr->loss_round_start) + return; /* skip the per-round-trip updates */ + /* Now do per-round-trip updates. */ + bbr_adapt_lower_bounds(sk, rs); + + bbr->loss_in_round = 0; + bbr->ecn_in_round = 0; +} + +/* Bandwidth probing can cause loss. To help coexistence with loss-based + * congestion control we spread out our probing in a Reno-conscious way. Due to + * the shape of the Reno sawtooth, the time required between loss epochs for an + * idealized Reno flow is a number of round trips that is the BDP of that + * flow. We count packet-timed round trips directly, since measured RTT can + * vary widely, and Reno is driven by packet-timed round trips. + */ +static bool bbr_is_reno_coexistence_probe_time(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 rounds; + + /* Random loss can shave some small percentage off of our inflight + * in each round. To survive this, flows need robust periodic probes. + */ + rounds = min_t(u32, bbr_param(sk, bw_probe_max_rounds), bbr_target_inflight(sk)); + return bbr->rounds_since_probe >= rounds; +} + +/* How long do we want to wait before probing for bandwidth (and risking + * loss)? We randomize the wait, for better mixing and fairness convergence. + * + * We bound the Reno-coexistence inter-bw-probe time to be 62-63 round trips. + * This is calculated to allow fairness with a 25Mbps, 30ms Reno flow, + * (eg 4K video to a broadband user): + * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets + * + * We bound the BBR-native inter-bw-probe wall clock time to be: + * (a) higher than 2 sec: to try to avoid causing loss for a long enough time + * to allow Reno at 30ms to get 4K video bw, the inter-bw-probe time must + * be at least: 25Mbps * .030sec / (1514bytes) * 0.030sec = 1.9secs + * (b) lower than 3 sec: to ensure flows can start probing in a reasonable + * amount of time to discover unutilized bw on human-scale interactive + * time-scales (e.g. perhaps traffic from a web page download that we + * were competing with is now complete). + */ +static void bbr_pick_probe_wait(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + /* Decide the random round-trip bound for wait until probe: */ + bbr->rounds_since_probe = + get_random_u32_below(bbr_param(sk, bw_probe_rand_rounds)); + /* Decide the random wall clock bound for wait until probe: */ + bbr->probe_wait_us = bbr_param(sk, bw_probe_base_us) + + get_random_u32_below(bbr_param(sk, bw_probe_rand_us)); +} + +static void bbr_set_cycle_idx(struct sock *sk, int cycle_idx) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->cycle_idx = cycle_idx; + /* New phase, so need to update cwnd and pacing rate. */ + bbr->try_fast_path = 0; +} + +/* Send at estimated bw to fill the pipe, but not queue. We need this phase + * before PROBE_UP, because as soon as we send faster than the available bw + * we will start building a queue, and if the buffer is shallow we can cause + * loss. If we do not fill the pipe before we cause this loss, our bw_hi and + * inflight_hi estimates will underestimate. + */ +static void bbr_start_bw_probe_refill(struct sock *sk, u32 bw_probe_up_rounds) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr_reset_lower_bounds(sk); + bbr->bw_probe_up_rounds = bw_probe_up_rounds; + bbr->bw_probe_up_acks = 0; + bbr->stopped_risky_probe = 0; + bbr->ack_phase = BBR_ACKS_REFILLING; + bbr->next_rtt_delivered = tp->delivered; + bbr_set_cycle_idx(sk, BBR_BW_PROBE_REFILL); +} + +/* Now probe max deliverable data rate and volume. */ +static void bbr_start_bw_probe_up(struct sock *sk, struct bbr_context *ctx) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->ack_phase = BBR_ACKS_PROBE_STARTING; + bbr->next_rtt_delivered = tp->delivered; + bbr->cycle_mstamp = tp->tcp_mstamp; + bbr_reset_full_bw(sk); + bbr->full_bw = ctx->sample_bw; + bbr_set_cycle_idx(sk, BBR_BW_PROBE_UP); + bbr_raise_inflight_hi_slope(sk); +} + +/* Start a new PROBE_BW probing cycle of some wall clock length. Pick a wall + * clock time at which to probe beyond an inflight that we think to be + * safe. This will knowingly risk packet loss, so we want to do this rarely, to + * keep packet loss rates low. Also start a round-trip counter, to probe faster + * if we estimate a Reno flow at our BDP would probe faster. + */ +static void bbr_start_bw_probe_down(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr_reset_congestion_signals(sk); + bbr->bw_probe_up_cnt = ~0U; /* not growing inflight_hi any more */ + bbr_pick_probe_wait(sk); + bbr->cycle_mstamp = tp->tcp_mstamp; /* start wall clock */ + bbr->ack_phase = BBR_ACKS_PROBE_STOPPING; + bbr->next_rtt_delivered = tp->delivered; + bbr_set_cycle_idx(sk, BBR_BW_PROBE_DOWN); +} + +/* Cruise: maintain what we estimate to be a neutral, conservative + * operating point, without attempting to probe up for bandwidth or down for + * RTT, and only reducing inflight in response to loss/ECN signals. + */ +static void bbr_start_bw_probe_cruise(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->inflight_lo != ~0U) + bbr->inflight_lo = min(bbr->inflight_lo, bbr->inflight_hi); + + bbr_set_cycle_idx(sk, BBR_BW_PROBE_CRUISE); +} + +/* Loss and/or ECN rate is too high while probing. + * Adapt (once per bw probe) by cutting inflight_hi and then restarting cycle. + */ +static void bbr_handle_inflight_too_high(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + const u32 beta = bbr_param(sk, beta); + + bbr->prev_probe_too_high = 1; + bbr->bw_probe_samples = 0; /* only react once per probe */ + /* If we are app-limited then we are not robustly + * probing the max volume of inflight data we think + * might be safe (analogous to how app-limited bw + * samples are not known to be robustly probing bw). + */ + if (!rs->is_app_limited) { + bbr->inflight_hi = max_t(u32, rs->tx_in_flight, + (u64)bbr_target_inflight(sk) * + (BBR_UNIT - beta) >> BBR_SCALE); + } + if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP) + bbr_start_bw_probe_down(sk); +} + +/* If we're seeing bw and loss samples reflecting our bw probing, adapt + * using the signals we see. If loss or ECN mark rate gets too high, then adapt + * inflight_hi downward. If we're able to push inflight higher without such + * signals, push higher: adapt inflight_hi upward. + */ +static bool bbr_adapt_upper_bounds(struct sock *sk, + const struct rate_sample *rs, + struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + + /* Track when we'll see bw/loss samples resulting from our bw probes. */ + if (bbr->ack_phase == BBR_ACKS_PROBE_STARTING && bbr->round_start) + bbr->ack_phase = BBR_ACKS_PROBE_FEEDBACK; + if (bbr->ack_phase == BBR_ACKS_PROBE_STOPPING && bbr->round_start) { + /* End of samples from bw probing phase. */ + bbr->bw_probe_samples = 0; + bbr->ack_phase = BBR_ACKS_INIT; + /* At this point in the cycle, our current bw sample is also + * our best recent chance at finding the highest available bw + * for this flow. So now is the best time to forget the bw + * samples from the previous cycle, by advancing the window. + */ + if (bbr->mode == BBR_PROBE_BW && !rs->is_app_limited) + bbr_advance_max_bw_filter(sk); + /* If we had an inflight_hi, then probed and pushed inflight all + * the way up to hit that inflight_hi without seeing any + * high loss/ECN in all the resulting ACKs from that probing, + * then probe up again, this time letting inflight persist at + * inflight_hi for a round trip, then accelerating beyond. + */ + if (bbr->mode == BBR_PROBE_BW && + bbr->stopped_risky_probe && !bbr->prev_probe_too_high) { + bbr_start_bw_probe_refill(sk, 0); + return true; /* yes, decided state transition */ + } + } + if (bbr_is_inflight_too_high(sk, rs)) { + if (bbr->bw_probe_samples) /* sample is from bw probing? */ + bbr_handle_inflight_too_high(sk, rs); + } else { + /* Loss/ECN rate is declared safe. Adjust upper bound upward. */ + + if (bbr->inflight_hi == ~0U) + return false; /* no excess queue signals yet */ + + /* To be resilient to random loss, we must raise bw/inflight_hi + * if we observe in any phase that a higher level is safe. + */ + if (rs->tx_in_flight > bbr->inflight_hi) { + bbr->inflight_hi = rs->tx_in_flight; + } + + if (bbr->mode == BBR_PROBE_BW && + bbr->cycle_idx == BBR_BW_PROBE_UP) + bbr_probe_inflight_hi_upward(sk, rs); + } + + return false; +} + +/* Check if it's time to probe for bandwidth now, and if so, kick it off. */ +static bool bbr_check_time_to_probe_bw(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 n; + + /* If we seem to be at an operating point where we are not seeing loss + * but we are seeing ECN marks, then when the ECN marks cease we reprobe + * quickly (in case cross-traffic has ceased and freed up bw). + */ + if (bbr_param(sk, ecn_reprobe_gain) && bbr->ecn_eligible && + bbr->ecn_in_cycle && !bbr->loss_in_cycle && + inet_csk(sk)->icsk_ca_state == TCP_CA_Open) { + /* Calculate n so that when bbr_raise_inflight_hi_slope() + * computes growth_this_round as 2^n it will be roughly the + * desired volume of data (inflight_hi*ecn_reprobe_gain). + */ + n = ilog2((((u64)bbr->inflight_hi * + bbr_param(sk, ecn_reprobe_gain)) >> BBR_SCALE)); + bbr_start_bw_probe_refill(sk, n); + return true; + } + + if (bbr_has_elapsed_in_phase(sk, bbr->probe_wait_us) || + bbr_is_reno_coexistence_probe_time(sk)) { + bbr_start_bw_probe_refill(sk, 0); + return true; + } + return false; +} + +/* Is it time to transition from PROBE_DOWN to PROBE_CRUISE? */ +static bool bbr_check_time_to_cruise(struct sock *sk, u32 inflight, u32 bw) +{ + /* Always need to pull inflight down to leave headroom in queue. */ + if (inflight > bbr_inflight_with_headroom(sk)) + return false; + + return inflight <= bbr_inflight(sk, bw, BBR_UNIT); +} + +/* PROBE_BW state machine: cruise, refill, probe for bw, or drain? */ +static void bbr_update_cycle_phase(struct sock *sk, + const struct rate_sample *rs, + struct bbr_context *ctx) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + bool is_bw_probe_done = false; + u32 inflight, bw; + + if (!bbr_full_bw_reached(sk)) + return; + + /* In DRAIN, PROBE_BW, or PROBE_RTT, adjust upper bounds. */ + if (bbr_adapt_upper_bounds(sk, rs, ctx)) + return; /* already decided state transition */ + + if (bbr->mode != BBR_PROBE_BW) + return; + + inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight); + bw = bbr_max_bw(sk); + + switch (bbr->cycle_idx) { + /* First we spend most of our time cruising with a pacing_gain of 1.0, + * which paces at the estimated bw, to try to fully use the pipe + * without building queue. If we encounter loss/ECN marks, we adapt + * by slowing down. + */ + case BBR_BW_PROBE_CRUISE: + if (bbr_check_time_to_probe_bw(sk, rs)) + return; /* already decided state transition */ + break; + + /* After cruising, when it's time to probe, we first "refill": we send + * at the estimated bw to fill the pipe, before probing higher and + * knowingly risking overflowing the bottleneck buffer (causing loss). + */ + case BBR_BW_PROBE_REFILL: + if (bbr->round_start) { + /* After one full round trip of sending in REFILL, we + * start to see bw samples reflecting our REFILL, which + * may be putting too much data in flight. + */ + bbr->bw_probe_samples = 1; + bbr_start_bw_probe_up(sk, ctx); + } + break; + + /* After we refill the pipe, we probe by using a pacing_gain > 1.0, to + * probe for bw. If we have not seen loss/ECN, we try to raise inflight + * to at least pacing_gain*BDP; note that this may take more than + * min_rtt if min_rtt is small (e.g. on a LAN). + * + * We terminate PROBE_UP bandwidth probing upon any of the following: + * + * (1) We've pushed inflight up to hit the inflight_hi target set in the + * most recent previous bw probe phase. Thus we want to start + * draining the queue immediately because it's very likely the most + * recently sent packets will fill the queue and cause drops. + * (2) If inflight_hi has not limited bandwidth growth recently, and + * yet delivered bandwidth has not increased much recently + * (bbr->full_bw_now). + * (3) Loss filter says loss rate is "too high". + * (4) ECN filter says ECN mark rate is "too high". + * + * (1) (2) checked here, (3) (4) checked in bbr_is_inflight_too_high() + */ + case BBR_BW_PROBE_UP: + if (bbr->prev_probe_too_high && + inflight >= bbr->inflight_hi) { + bbr->stopped_risky_probe = 1; + is_bw_probe_done = true; + } else { + if (tp->is_cwnd_limited && + tcp_snd_cwnd(tp) >= bbr->inflight_hi) { + /* inflight_hi is limiting bw growth */ + bbr_reset_full_bw(sk); + bbr->full_bw = ctx->sample_bw; + } else if (bbr->full_bw_now) { + /* Plateau in estimated bw. Pipe looks full. */ + is_bw_probe_done = true; + } + } + if (is_bw_probe_done) { + bbr->prev_probe_too_high = 0; /* no loss/ECN (yet) */ + bbr_start_bw_probe_down(sk); /* restart w/ down */ + } + break; + + /* After probing in PROBE_UP, we have usually accumulated some data in + * the bottleneck buffer (if bw probing didn't find more bw). We next + * enter PROBE_DOWN to try to drain any excess data from the queue. To + * do this, we use a pacing_gain < 1.0. We hold this pacing gain until + * our inflight is less then that target cruising point, which is the + * minimum of (a) the amount needed to leave headroom, and (b) the + * estimated BDP. Once inflight falls to match the target, we estimate + * the queue is drained; persisting would underutilize the pipe. + */ + case BBR_BW_PROBE_DOWN: + if (bbr_check_time_to_probe_bw(sk, rs)) + return; /* already decided state transition */ + if (bbr_check_time_to_cruise(sk, inflight, bw)) + bbr_start_bw_probe_cruise(sk); + break; + + default: + WARN_ONCE(1, "BBR invalid cycle index %u\n", bbr->cycle_idx); + } +} + +/* Exiting PROBE_RTT, so return to bandwidth probing in STARTUP or PROBE_BW. */ +static void bbr_exit_probe_rtt(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr_reset_lower_bounds(sk); + if (bbr_full_bw_reached(sk)) { + bbr->mode = BBR_PROBE_BW; + /* Raising inflight after PROBE_RTT may cause loss, so reset + * the PROBE_BW clock and schedule the next bandwidth probe for + * a friendly and randomized future point in time. + */ + bbr_start_bw_probe_down(sk); + /* Since we are exiting PROBE_RTT, we know inflight is + * below our estimated BDP, so it is reasonable to cruise. + */ + bbr_start_bw_probe_cruise(sk); + } else { + bbr->mode = BBR_STARTUP; + } +} + +/* Exit STARTUP based on loss rate > 1% and loss gaps in round >= N. Wait until + * the end of the round in recovery to get a good estimate of how many packets + * have been lost, and how many we need to drain with a low pacing rate. + */ +static void bbr_check_loss_too_high_in_startup(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr_full_bw_reached(sk)) + return; + + /* For STARTUP exit, check the loss rate at the end of each round trip + * of Recovery episodes in STARTUP. We check the loss rate at the end + * of the round trip to filter out noisy/low loss and have a better + * sense of inflight (extent of loss), so we can drain more accurately. + */ + if (rs->losses && bbr->loss_events_in_round < 0xf) + bbr->loss_events_in_round++; /* update saturating counter */ + if (bbr_param(sk, full_loss_cnt) && bbr->loss_round_start && + inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery && + bbr->loss_events_in_round >= bbr_param(sk, full_loss_cnt) && + bbr_is_inflight_too_high(sk, rs)) { + bbr_handle_queue_too_high_in_startup(sk); + return; + } + if (bbr->loss_round_start) + bbr->loss_events_in_round = 0; +} + +/* Estimate when the pipe is full, using the change in delivery rate: BBR + * estimates bw probing filled the pipe if the estimated bw hasn't changed by + * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited + * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the + * higher rwin, 3: we get higher delivery rate samples. Or transient + * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar + * design goal, but uses delay and inter-ACK spacing instead of bandwidth. + */ +static void bbr_check_full_bw_reached(struct sock *sk, + const struct rate_sample *rs, + struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 bw_thresh, full_cnt, thresh; + + if (bbr->full_bw_now || rs->is_app_limited) + return; + + thresh = bbr_param(sk, full_bw_thresh); + full_cnt = bbr_param(sk, full_bw_cnt); + bw_thresh = (u64)bbr->full_bw * thresh >> BBR_SCALE; + if (ctx->sample_bw >= bw_thresh) { + bbr_reset_full_bw(sk); + bbr->full_bw = ctx->sample_bw; + return; + } + if (!bbr->round_start) + return; + ++bbr->full_bw_cnt; + bbr->full_bw_now = bbr->full_bw_cnt >= full_cnt; + bbr->full_bw_reached |= bbr->full_bw_now; +} + +/* If pipe is probably full, drain the queue and then enter steady-state. */ +static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs, + struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { + bbr->mode = BBR_DRAIN; /* drain queue we created */ + /* Set ssthresh to export purely for monitoring, to signal + * completion of initial STARTUP by setting to a non- + * TCP_INFINITE_SSTHRESH value (ssthresh is not used by BBR). + */ + tcp_sk(sk)->snd_ssthresh = + bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); + bbr_reset_congestion_signals(sk); + } /* fall through to check if in-flight is already small: */ + if (bbr->mode == BBR_DRAIN && + bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= + bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) { + bbr->mode = BBR_PROBE_BW; + bbr_start_bw_probe_down(sk); + } +} + +static void bbr_update_model(struct sock *sk, const struct rate_sample *rs, + struct bbr_context *ctx) +{ + bbr_update_congestion_signals(sk, rs, ctx); + bbr_update_ack_aggregation(sk, rs); + bbr_check_loss_too_high_in_startup(sk, rs); + bbr_check_full_bw_reached(sk, rs, ctx); + bbr_check_drain(sk, rs, ctx); + bbr_update_cycle_phase(sk, rs, ctx); + bbr_update_min_rtt(sk, rs); +} + +/* Fast path for app-limited case. + * + * On each ack, we execute bbr state machine, which primarily consists of: + * 1) update model based on new rate sample, and + * 2) update control based on updated model or state change. + * + * There are certain workload/scenarios, e.g. app-limited case, where + * either we can skip updating model or we can skip update of both model + * as well as control. This provides signifcant softirq cpu savings for + * processing incoming acks. + * + * In case of app-limited, if there is no congestion (loss/ecn) and + * if observed bw sample is less than current estimated bw, then we can + * skip some of the computation in bbr state processing: + * + * - if there is no rtt/mode/phase change: In this case, since all the + * parameters of the network model are constant, we can skip model + * as well control update. + * + * - else we can skip rest of the model update. But we still need to + * update the control to account for the new rtt/mode/phase. + * + * Returns whether we can take fast path or not. + */ +static bool bbr_run_fast_path(struct sock *sk, bool *update_model, + const struct rate_sample *rs, struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 prev_min_rtt_us, prev_mode; + + if (bbr_param(sk, fast_path) && bbr->try_fast_path && + rs->is_app_limited && ctx->sample_bw < bbr_max_bw(sk) && + !bbr->loss_in_round && !bbr->ecn_in_round ) { + prev_mode = bbr->mode; + prev_min_rtt_us = bbr->min_rtt_us; + bbr_check_drain(sk, rs, ctx); + bbr_update_cycle_phase(sk, rs, ctx); + bbr_update_min_rtt(sk, rs); + + if (bbr->mode == prev_mode && + bbr->min_rtt_us == prev_min_rtt_us && + bbr->try_fast_path) { + return true; + } + + /* Skip model update, but control still needs to be updated */ + *update_model = false; + } + return false; +} + +__bpf_kfunc static void bbr_main(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + struct bbr_context ctx = { 0 }; + bool update_model = true; + u32 bw, round_delivered; + int ce_ratio = -1; + + round_delivered = bbr_update_round_start(sk, rs, &ctx); + if (bbr->round_start) { + bbr->rounds_since_probe = + min_t(s32, bbr->rounds_since_probe + 1, 0xFF); + ce_ratio = bbr_update_ecn_alpha(sk); + } + bbr_plb(sk, rs, ce_ratio); + + bbr->ecn_in_round |= (bbr->ecn_eligible && rs->is_ece); + bbr_calculate_bw_sample(sk, rs, &ctx); + bbr_update_latest_delivery_signals(sk, rs, &ctx); + + if (bbr_run_fast_path(sk, &update_model, rs, &ctx)) + goto out; + + if (update_model) + bbr_update_model(sk, rs, &ctx); + + bbr_update_gains(sk); + bw = bbr_bw(sk); + bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); + bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain, + tcp_snd_cwnd(tp), &ctx); + bbr_bound_cwnd_for_inflight_model(sk); + +out: + bbr_advance_latest_delivery_signals(sk, rs, &ctx); + bbr->prev_ca_state = inet_csk(sk)->icsk_ca_state; + bbr->loss_in_cycle |= rs->lost > 0; + bbr->ecn_in_cycle |= rs->delivered_ce > 0; +} + +__bpf_kfunc static void bbr_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->initialized = 1; + + bbr->init_cwnd = min(0x7FU, tcp_snd_cwnd(tp)); + bbr->prior_cwnd = tp->prior_cwnd; + tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; + bbr->next_rtt_delivered = tp->delivered; + bbr->prev_ca_state = TCP_CA_Open; + + bbr->probe_rtt_done_stamp = 0; + bbr->probe_rtt_round_done = 0; + bbr->probe_rtt_min_us = tcp_min_rtt(tp); + bbr->probe_rtt_min_stamp = tcp_jiffies32; + bbr->min_rtt_us = tcp_min_rtt(tp); + bbr->min_rtt_stamp = tcp_jiffies32; + + bbr->has_seen_rtt = 0; + bbr_init_pacing_rate_from_rtt(sk); + + bbr->round_start = 0; + bbr->idle_restart = 0; + bbr->full_bw_reached = 0; + bbr->full_bw = 0; bbr->full_bw_cnt = 0; - bbr_reset_lt_bw_sampling(sk); - return tcp_snd_cwnd(tcp_sk(sk)); + bbr->cycle_mstamp = 0; + bbr->cycle_idx = 0; + + bbr_reset_startup_mode(sk); + + bbr->ack_epoch_mstamp = tp->tcp_mstamp; + bbr->ack_epoch_acked = 0; + bbr->extra_acked_win_rtts = 0; + bbr->extra_acked_win_idx = 0; + bbr->extra_acked[0] = 0; + bbr->extra_acked[1] = 0; + + bbr->ce_state = 0; + bbr->prior_rcv_nxt = tp->rcv_nxt; + bbr->try_fast_path = 0; + + cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); + + /* Start sampling ECN mark rate after first full flight is ACKed: */ + bbr->loss_round_delivered = tp->delivered + 1; + bbr->loss_round_start = 0; + bbr->undo_bw_lo = 0; + bbr->undo_inflight_lo = 0; + bbr->undo_inflight_hi = 0; + bbr->loss_events_in_round = 0; + bbr->startup_ecn_rounds = 0; + bbr_reset_congestion_signals(sk); + bbr->bw_lo = ~0U; + bbr->bw_hi[0] = 0; + bbr->bw_hi[1] = 0; + bbr->inflight_lo = ~0U; + bbr->inflight_hi = ~0U; + bbr_reset_full_bw(sk); + bbr->bw_probe_up_cnt = ~0U; + bbr->bw_probe_up_acks = 0; + bbr->bw_probe_up_rounds = 0; + bbr->probe_wait_us = 0; + bbr->stopped_risky_probe = 0; + bbr->ack_phase = BBR_ACKS_INIT; + bbr->rounds_since_probe = 0; + bbr->bw_probe_samples = 0; + bbr->prev_probe_too_high = 0; + bbr->ecn_eligible = 0; + bbr->ecn_alpha = bbr_param(sk, ecn_alpha_init); + bbr->alpha_last_delivered = 0; + bbr->alpha_last_delivered_ce = 0; + bbr->plb.pause_until = 0; + + tp->fast_ack_mode = bbr_fast_ack_mode ? 1 : 0; + + if (bbr_can_use_ecn(sk)) + tp->ecn_flags |= TCP_ECN_ECT_PERMANENT; +} + +/* BBR marks the current round trip as a loss round. */ +static void bbr_note_loss(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + /* Capture "current" data over the full round trip of loss, to + * have a better chance of observing the full capacity of the path. + */ + if (!bbr->loss_in_round) /* first loss in this round trip? */ + bbr->loss_round_delivered = tp->delivered; /* set round trip */ + bbr->loss_in_round = 1; + bbr->loss_in_cycle = 1; } -/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */ +/* Core TCP stack informs us that the given skb was just marked lost. */ +__bpf_kfunc static void bbr_skb_marked_lost(struct sock *sk, + const struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + struct rate_sample rs = {}; + + bbr_note_loss(sk); + + if (!bbr->bw_probe_samples) + return; /* not an skb sent while probing for bandwidth */ + if (unlikely(!scb->tx.delivered_mstamp)) + return; /* skb was SACKed, reneged, marked lost; ignore it */ + /* We are probing for bandwidth. Construct a rate sample that + * estimates what happened in the flight leading up to this lost skb, + * then see if the loss rate went too high, and if so at which packet. + */ + rs.tx_in_flight = scb->tx.in_flight; + rs.lost = tp->lost - scb->tx.lost; + rs.is_app_limited = scb->tx.is_app_limited; + if (bbr_is_inflight_too_high(sk, &rs)) { + rs.tx_in_flight = bbr_inflight_hi_from_lost_skb(sk, &rs, skb); + bbr_handle_inflight_too_high(sk, &rs); + } +} + +static void bbr_run_loss_probe_recovery(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + struct rate_sample rs = {0}; + + bbr_note_loss(sk); + + if (!bbr->bw_probe_samples) + return; /* not sent while probing for bandwidth */ + /* We are probing for bandwidth. Construct a rate sample that + * estimates what happened in the flight leading up to this + * loss, then see if the loss rate went too high. + */ + rs.lost = 1; /* TLP probe repaired loss of a single segment */ + rs.tx_in_flight = bbr->inflight_latest + rs.lost; + rs.is_app_limited = tp->tlp_orig_data_app_limited; + if (bbr_is_inflight_too_high(sk, &rs)) + bbr_handle_inflight_too_high(sk, &rs); +} + +/* Revert short-term model if current loss recovery event was spurious. */ +__bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr_reset_full_bw(sk); /* spurious slow-down; reset full bw detector */ + bbr->loss_in_round = 0; + + /* Revert to cwnd and other state saved before loss episode. */ + bbr->bw_lo = max(bbr->bw_lo, bbr->undo_bw_lo); + bbr->inflight_lo = max(bbr->inflight_lo, bbr->undo_inflight_lo); + bbr->inflight_hi = max(bbr->inflight_hi, bbr->undo_inflight_hi); + bbr->try_fast_path = 0; /* take slow path to set proper cwnd, pacing */ + return bbr->prior_cwnd; +} + +/* Entering loss recovery, so save state for when we undo recovery. */ __bpf_kfunc static u32 bbr_ssthresh(struct sock *sk) { + struct bbr *bbr = inet_csk_ca(sk); + bbr_save_cwnd(sk); + /* For undo, save state that adapts based on loss signal. */ + bbr->undo_bw_lo = bbr->bw_lo; + bbr->undo_inflight_lo = bbr->inflight_lo; + bbr->undo_inflight_hi = bbr->inflight_hi; return tcp_sk(sk)->snd_ssthresh; } +static enum tcp_bbr_phase bbr_get_phase(struct bbr *bbr) +{ + switch (bbr->mode) { + case BBR_STARTUP: + return BBR_PHASE_STARTUP; + case BBR_DRAIN: + return BBR_PHASE_DRAIN; + case BBR_PROBE_BW: + break; + case BBR_PROBE_RTT: + return BBR_PHASE_PROBE_RTT; + default: + return BBR_PHASE_INVALID; + } + switch (bbr->cycle_idx) { + case BBR_BW_PROBE_UP: + return BBR_PHASE_PROBE_BW_UP; + case BBR_BW_PROBE_DOWN: + return BBR_PHASE_PROBE_BW_DOWN; + case BBR_BW_PROBE_CRUISE: + return BBR_PHASE_PROBE_BW_CRUISE; + case BBR_BW_PROBE_REFILL: + return BBR_PHASE_PROBE_BW_REFILL; + default: + return BBR_PHASE_INVALID; + } +} + static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr, - union tcp_cc_info *info) + union tcp_cc_info *info) { if (ext & (1 << (INET_DIAG_BBRINFO - 1)) || ext & (1 << (INET_DIAG_VEGASINFO - 1))) { - struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - u64 bw = bbr_bw(sk); - - bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE; - memset(&info->bbr, 0, sizeof(info->bbr)); - info->bbr.bbr_bw_lo = (u32)bw; - info->bbr.bbr_bw_hi = (u32)(bw >> 32); - info->bbr.bbr_min_rtt = bbr->min_rtt_us; - info->bbr.bbr_pacing_gain = bbr->pacing_gain; - info->bbr.bbr_cwnd_gain = bbr->cwnd_gain; + u64 bw = bbr_bw_bytes_per_sec(sk, bbr_bw(sk)); + u64 bw_hi = bbr_bw_bytes_per_sec(sk, bbr_max_bw(sk)); + u64 bw_lo = bbr->bw_lo == ~0U ? + ~0ULL : bbr_bw_bytes_per_sec(sk, bbr->bw_lo); + struct tcp_bbr_info *bbr_info = &info->bbr; + + memset(bbr_info, 0, sizeof(*bbr_info)); + bbr_info->bbr_bw_lo = (u32)bw; + bbr_info->bbr_bw_hi = (u32)(bw >> 32); + bbr_info->bbr_min_rtt = bbr->min_rtt_us; + bbr_info->bbr_pacing_gain = bbr->pacing_gain; + bbr_info->bbr_cwnd_gain = bbr->cwnd_gain; + bbr_info->bbr_bw_hi_lsb = (u32)bw_hi; + bbr_info->bbr_bw_hi_msb = (u32)(bw_hi >> 32); + bbr_info->bbr_bw_lo_lsb = (u32)bw_lo; + bbr_info->bbr_bw_lo_msb = (u32)(bw_lo >> 32); + bbr_info->bbr_mode = bbr->mode; + bbr_info->bbr_phase = (__u8)bbr_get_phase(bbr); + bbr_info->bbr_version = (__u8)BBR_VERSION; + bbr_info->bbr_inflight_lo = bbr->inflight_lo; + bbr_info->bbr_inflight_hi = bbr->inflight_hi; + bbr_info->bbr_extra_acked = bbr_extra_acked(sk); *attr = INET_DIAG_BBRINFO; - return sizeof(info->bbr); + return sizeof(*bbr_info); } return 0; } __bpf_kfunc static void bbr_set_state(struct sock *sk, u8 new_state) { + struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); if (new_state == TCP_CA_Loss) { - struct rate_sample rs = { .losses = 1 }; bbr->prev_ca_state = TCP_CA_Loss; - bbr->full_bw = 0; - bbr->round_start = 1; /* treat RTO like end of a round */ - bbr_lt_bw_sampling(sk, &rs); + tcp_plb_update_state_upon_rto(sk, &bbr->plb); + /* The tcp_write_timeout() call to sk_rethink_txhash() likely + * repathed this flow, so re-learn the min network RTT on the + * new path: + */ + bbr_reset_full_bw(sk); + if (!bbr_is_probing_bandwidth(sk) && bbr->inflight_lo == ~0U) { + /* bbr_adapt_lower_bounds() needs cwnd before + * we suffered an RTO, to update inflight_lo: + */ + bbr->inflight_lo = + max(tcp_snd_cwnd(tp), bbr->prior_cwnd); + } + } else if (bbr->prev_ca_state == TCP_CA_Loss && + new_state != TCP_CA_Loss) { + bbr_exit_loss_recovery(sk); } } + static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { - .flags = TCP_CONG_NON_RESTRICTED, + .flags = TCP_CONG_NON_RESTRICTED | TCP_CONG_WANTS_CE_EVENTS, .name = "bbr", .owner = THIS_MODULE, .init = bbr_init, .cong_control = bbr_main, .sndbuf_expand = bbr_sndbuf_expand, + .skb_marked_lost = bbr_skb_marked_lost, .undo_cwnd = bbr_undo_cwnd, .cwnd_event = bbr_cwnd_event, .ssthresh = bbr_ssthresh, - .min_tso_segs = bbr_min_tso_segs, + .tso_segs = bbr_tso_segs, .get_info = bbr_get_info, .set_state = bbr_set_state, }; @@ -1161,10 +2361,11 @@ BTF_SET8_START(tcp_bbr_check_kfunc_ids) BTF_ID_FLAGS(func, bbr_init) BTF_ID_FLAGS(func, bbr_main) BTF_ID_FLAGS(func, bbr_sndbuf_expand) +BTF_ID_FLAGS(func, bbr_skb_marked_lost) BTF_ID_FLAGS(func, bbr_undo_cwnd) BTF_ID_FLAGS(func, bbr_cwnd_event) BTF_ID_FLAGS(func, bbr_ssthresh) -BTF_ID_FLAGS(func, bbr_min_tso_segs) +BTF_ID_FLAGS(func, bbr_tso_segs) BTF_ID_FLAGS(func, bbr_set_state) #endif #endif @@ -1199,5 +2400,12 @@ MODULE_AUTHOR("Van Jacobson "); MODULE_AUTHOR("Neal Cardwell "); MODULE_AUTHOR("Yuchung Cheng "); MODULE_AUTHOR("Soheil Hassas Yeganeh "); +MODULE_AUTHOR("Priyaranjan Jha "); +MODULE_AUTHOR("Yousuk Seung "); +MODULE_AUTHOR("Kevin Yang "); +MODULE_AUTHOR("Arjun Roy "); +MODULE_AUTHOR("David Morley "); + MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)"); +MODULE_VERSION(__stringify(BBR_VERSION)); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 1b34050a7538..66d40449b3f4 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -241,6 +241,7 @@ void tcp_init_congestion_control(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); tcp_sk(sk)->prior_ssthresh = 0; + tcp_sk(sk)->fast_ack_mode = 0; if (icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); if (tcp_ca_needs_ecn(sk)) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index df7b13f0e5e0..8415aa41524e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -364,7 +364,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) tcp_enter_quickack_mode(sk, 2); break; case INET_ECN_CE: - if (tcp_ca_needs_ecn(sk)) + if (tcp_ca_wants_ce_events(sk)) tcp_ca_event(sk, CA_EVENT_ECN_IS_CE); if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { @@ -375,7 +375,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) tp->ecn_flags |= TCP_ECN_SEEN; break; default: - if (tcp_ca_needs_ecn(sk)) + if (tcp_ca_wants_ce_events(sk)) tcp_ca_event(sk, CA_EVENT_ECN_NO_CE); tp->ecn_flags |= TCP_ECN_SEEN; break; @@ -1112,7 +1112,12 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) */ static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb) { + struct sock *sk = (struct sock *)tp; + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; + tp->lost += tcp_skb_pcount(skb); + if (ca_ops->skb_marked_lost) + ca_ops->skb_marked_lost(sk, skb); } void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb) @@ -1493,6 +1498,17 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); tcp_skb_pcount_add(skb, -pcount); + /* Adjust tx.in_flight as pcount is shifted from skb to prev. */ + if (WARN_ONCE(TCP_SKB_CB(skb)->tx.in_flight < pcount, + "prev in_flight: %u skb in_flight: %u pcount: %u", + TCP_SKB_CB(prev)->tx.in_flight, + TCP_SKB_CB(skb)->tx.in_flight, + pcount)) + TCP_SKB_CB(skb)->tx.in_flight = 0; + else + TCP_SKB_CB(skb)->tx.in_flight -= pcount; + TCP_SKB_CB(prev)->tx.in_flight += pcount; + /* When we're adding to gso_segs == 1, gso_size will be zero, * in theory this shouldn't be necessary but as long as DSACK * code can come after this skb later on it's better to keep @@ -3761,7 +3777,8 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) /* This routine deals with acks during a TLP episode and ends an episode by * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack */ -static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) +static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag, + struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); @@ -3778,6 +3795,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) /* ACK advances: there was a loss, so reduce cwnd. Reset * tlp_high_seq in tcp_init_cwnd_reduction() */ + tcp_ca_event(sk, CA_EVENT_TLP_RECOVERY); tcp_init_cwnd_reduction(sk); tcp_set_ca_state(sk, TCP_CA_CWR); tcp_end_cwnd_reduction(sk); @@ -3788,6 +3806,11 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) FLAG_NOT_DUP | FLAG_DATA_SACKED))) { /* Pure dupack: original and TLP probe arrived; no loss */ tp->tlp_high_seq = 0; + } else { + /* This ACK matches a TLP retransmit. We cannot yet tell if + * this ACK is for the original or the TLP retransmit. + */ + rs->is_acking_tlp_retrans_seq = 1; } } @@ -3896,6 +3919,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una; rs.prior_in_flight = tcp_packets_in_flight(tp); + tcp_rate_check_app_limited(sk); /* ts_recent update must be made after we are sure that the packet * is in window. @@ -3970,7 +3994,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_rack_update_reo_wnd(sk, &rs); if (tp->tlp_high_seq) - tcp_process_tlp_ack(sk, ack, flag); + tcp_process_tlp_ack(sk, ack, flag, &rs); if (tcp_ack_is_dubious(sk, flag)) { if (!(flag & (FLAG_SND_UNA_ADVANCED | @@ -3994,6 +4018,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) delivered = tcp_newly_delivered(sk, delivered, flag); lost = tp->lost - lost; /* freshly marked lost */ rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); + rs.is_ece = !!(flag & FLAG_ECE); tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); tcp_xmit_recovery(sk, rexmit); @@ -4013,7 +4038,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_ack_probe(sk); if (tp->tlp_high_seq) - tcp_process_tlp_ack(sk, ack, flag); + tcp_process_tlp_ack(sk, ack, flag, &rs); return 1; old_ack: @@ -5664,13 +5689,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) /* More than one full frame received... */ if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && + (tp->fast_ack_mode == 1 || /* ... and right edge of window advances far enough. * (tcp_recvmsg() will send ACK otherwise). * If application uses SO_RCVLOWAT, we want send ack now if * we have not received enough bytes to satisfy the condition. */ - (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || - __tcp_select_window(sk) >= tp->rcv_wnd)) || + (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || + __tcp_select_window(sk) >= tp->rcv_wnd))) || /* We ACK each frame or... */ tcp_in_quickack_mode(sk) || /* Protocol state mandates a one-time immediate ACK */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0ecc7311dc6c..82622782486a 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -460,6 +460,8 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); bool ca_got_dst = false; + tcp_set_ecn_low_from_dst(sk, dst); + if (ca_key != TCP_CA_UNSPEC) { const struct tcp_congestion_ops *ca; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e3167ad96567..08fe7a626be1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -332,10 +332,9 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 || tcp_ca_needs_ecn(sk) || bpf_needs_ecn; + const struct dst_entry *dst = __sk_dst_get(sk); if (!use_ecn) { - const struct dst_entry *dst = __sk_dst_get(sk); - if (dst && dst_feature(dst, RTAX_FEATURE_ECN)) use_ecn = true; } @@ -347,6 +346,9 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) tp->ecn_flags = TCP_ECN_OK; if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) INET_ECN_xmit(sk); + + if (dst) + tcp_set_ecn_low_from_dst(sk, dst); } } @@ -384,7 +386,8 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, th->cwr = 1; skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; } - } else if (!tcp_ca_needs_ecn(sk)) { + } else if (!(tp->ecn_flags & TCP_ECN_ECT_PERMANENT) && + !tcp_ca_needs_ecn(sk)) { /* ACK or retransmitted segment: clear ECT|CE */ INET_ECN_dontxmit(sk); } @@ -1593,7 +1596,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; - int old_factor; + int old_factor, inflight_prev; long limit; int nlen; u8 flags; @@ -1668,6 +1671,30 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, if (diff) tcp_adjust_pcount(sk, skb, diff); + + inflight_prev = TCP_SKB_CB(skb)->tx.in_flight - old_factor; + if (inflight_prev < 0) { + WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious( + old_factor, + TCP_SKB_CB(skb)->sacked, + TCP_SKB_CB(skb)->tx.in_flight), + "inconsistent: tx.in_flight: %u " + "old_factor: %d mss: %u sacked: %u " + "1st pcount: %d 2nd pcount: %d " + "1st len: %u 2nd len: %u ", + TCP_SKB_CB(skb)->tx.in_flight, old_factor, + mss_now, TCP_SKB_CB(skb)->sacked, + tcp_skb_pcount(skb), tcp_skb_pcount(buff), + skb->len, buff->len); + inflight_prev = 0; + } + /* Set 1st tx.in_flight as if 1st were sent by itself: */ + TCP_SKB_CB(skb)->tx.in_flight = inflight_prev + + tcp_skb_pcount(skb); + /* Set 2nd tx.in_flight with new 1st and 2nd pcounts: */ + TCP_SKB_CB(buff)->tx.in_flight = inflight_prev + + tcp_skb_pcount(skb) + + tcp_skb_pcount(buff); } /* Link BUFF into the send queue. */ @@ -2025,13 +2052,12 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) { const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; - u32 min_tso, tso_segs; - - min_tso = ca_ops->min_tso_segs ? - ca_ops->min_tso_segs(sk) : - READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); + u32 tso_segs; - tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); + tso_segs = ca_ops->tso_segs ? + ca_ops->tso_segs(sk, mss_now) : + tcp_tso_autosize(sk, mss_now, + sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); return min_t(u32, tso_segs, sk->sk_gso_max_segs); } @@ -2731,6 +2757,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true); list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); tcp_init_tso_segs(skb, mss_now); + tcp_set_tx_in_flight(sk, skb); goto repair; /* Skip network transmission */ } @@ -2944,6 +2971,7 @@ void tcp_send_loss_probe(struct sock *sk) if (WARN_ON(!skb || !tcp_skb_pcount(skb))) goto rearm_timer; + tp->tlp_orig_data_app_limited = TCP_SKB_CB(skb)->tx.is_app_limited; if (__tcp_retransmit_skb(sk, skb, 1)) goto rearm_timer; diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index a8f6d9d06f2e..8737f2134648 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -34,6 +34,24 @@ * ready to send in the write queue. */ +void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 in_flight; + + /* Check, sanitize, and record packets in flight after skb was sent. */ + in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb); + if (WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX, + "insane in_flight %u cc %s mss %u " + "cwnd %u pif %u %u %u %u\n", + in_flight, inet_csk(sk)->icsk_ca_ops->name, + tp->mss_cache, tp->snd_cwnd, + tp->packets_out, tp->retrans_out, + tp->sacked_out, tp->lost_out)) + in_flight = TCPCB_IN_FLIGHT_MAX; + TCP_SKB_CB(skb)->tx.in_flight = in_flight; +} + /* Snapshot the current delivery information in the skb, to generate * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). */ @@ -66,7 +84,9 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; TCP_SKB_CB(skb)->tx.delivered = tp->delivered; TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce; + TCP_SKB_CB(skb)->tx.lost = tp->lost; TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; + tcp_set_tx_in_flight(sk, skb); } /* When an skb is sacked or acked, we fill in the rate sample with the (prior) @@ -91,18 +111,21 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, if (!rs->prior_delivered || tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp, scb->end_seq, rs->last_end_seq)) { + rs->prior_lost = scb->tx.lost; rs->prior_delivered_ce = scb->tx.delivered_ce; rs->prior_delivered = scb->tx.delivered; rs->prior_mstamp = scb->tx.delivered_mstamp; rs->is_app_limited = scb->tx.is_app_limited; rs->is_retrans = scb->sacked & TCPCB_RETRANS; + rs->tx_in_flight = scb->tx.in_flight; rs->last_end_seq = scb->end_seq; /* Record send time of most recently ACKed packet: */ tp->first_tx_mstamp = tx_tstamp; /* Find the duration of the "send phase" of this window: */ - rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp, - scb->tx.first_tx_mstamp); + rs->interval_us = tcp_stamp32_us_delta( + tp->first_tx_mstamp, + scb->tx.first_tx_mstamp); } /* Mark off the skb delivered once it's sacked to avoid being @@ -144,6 +167,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, return; } rs->delivered = tp->delivered - rs->prior_delivered; + rs->lost = tp->lost - rs->prior_lost; rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce; /* delivered_ce occupies less than 32 bits in the skb control block */ @@ -155,7 +179,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, * longer phase. */ snd_us = rs->interval_us; /* send phase */ - ack_us = tcp_stamp_us_delta(tp->tcp_mstamp, + ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp, rs->prior_mstamp); /* ack phase */ rs->interval_us = max(snd_us, ack_us); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index d1ad20ce1c8c..ef74f33c7905 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -678,6 +678,7 @@ void tcp_write_timer_handler(struct sock *sk) return; } + tcp_rate_check_app_limited(sk); tcp_mstamp_refresh(tcp_sk(sk)); event = icsk->icsk_pending; -- 2.45.0 From 82b79c17bf8e38cc761840e5f08f64fcb87bf1e7 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Wed, 10 Apr 2024 17:59:18 +0200 Subject: [PATCH 4/9] block Signed-off-by: Peter Jung --- block/bfq-iosched.c | 120 ++++++++++++++++++++++++++++++++++++-------- block/bfq-iosched.h | 16 +++++- block/mq-deadline.c | 114 +++++++++++++++++++++++++++++++++-------- 3 files changed, 205 insertions(+), 45 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 3cce6de464a7..9bd57baa4b0b 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -467,6 +467,21 @@ static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q) return icq; } +static struct bfq_io_cq *bfq_bic_try_lookup(struct request_queue *q) +{ + if (!current->io_context) + return NULL; + if (spin_trylock_irq(&q->queue_lock)) { + struct bfq_io_cq *icq; + + icq = icq_to_bic(ioc_lookup_icq(q)); + spin_unlock_irq(&q->queue_lock); + return icq; + } + + return NULL; +} + /* * Scheduler run of queue, if there are requests pending and no one in the * driver that will restart queueing. @@ -2454,10 +2469,21 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio, * returned by bfq_bic_lookup does not go away before * bfqd->lock is taken. */ - struct bfq_io_cq *bic = bfq_bic_lookup(q); + struct bfq_io_cq *bic = bfq_bic_try_lookup(q); bool ret; - spin_lock_irq(&bfqd->lock); + /* + * bio merging is called for every bio queued, and it's very easy + * to run into contention because of that. If we fail getting + * the dd lock, just skip this merge attempt. For related IO, the + * plug will be the successful merging point. If we get here, we + * already failed doing the obvious merge. Chances of actually + * getting a merge off this path is a lot slimmer, so skipping an + * occassional lookup that will most likely not succeed anyway should + * not be a problem. + */ + if (!spin_trylock_irq(&bfqd->lock)) + return false; if (bic) { /* @@ -5148,6 +5174,10 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx) { struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; + if (!list_empty_careful(&bfqd->at_head) || + !list_empty_careful(&bfqd->at_tail)) + return true; + /* * Avoiding lock: a race on bfqd->queued should cause at * most a call to dispatch for nothing @@ -5297,15 +5327,61 @@ static inline void bfq_update_dispatch_stats(struct request_queue *q, bool idle_timer_disabled) {} #endif /* CONFIG_BFQ_CGROUP_DEBUG */ +static void bfq_insert_request(struct request_queue *q, struct request *rq, + blk_insert_t flags, struct list_head *free); + +static void __bfq_do_insert(struct request_queue *q, blk_insert_t flags, + struct list_head *list, struct list_head *free) +{ + while (!list_empty(list)) { + struct request *rq; + + rq = list_first_entry(list, struct request, queuelist); + list_del_init(&rq->queuelist); + bfq_insert_request(q, rq, flags, free); + } +} + +static void bfq_do_insert(struct request_queue *q, struct list_head *free) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + LIST_HEAD(at_head); + LIST_HEAD(at_tail); + + spin_lock(&bfqd->insert_lock); + list_splice_init(&bfqd->at_head, &at_head); + list_splice_init(&bfqd->at_tail, &at_tail); + spin_unlock(&bfqd->insert_lock); + + __bfq_do_insert(q, BLK_MQ_INSERT_AT_HEAD, &at_head, free); + __bfq_do_insert(q, 0, &at_tail, free); +} + static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) { - struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; + struct request_queue *q = hctx->queue; + struct bfq_data *bfqd = q->elevator->elevator_data; struct request *rq; struct bfq_queue *in_serv_queue; bool waiting_rq, idle_timer_disabled = false; + LIST_HEAD(free); + + /* + * If someone else is already dispatching, skip this one. This will + * defer the next dispatch event to when something completes, and could + * potentially lower the queue depth for contended cases. + * + * See the logic in blk_mq_do_dispatch_sched(), which loops and + * retries if nothing is dispatched. + */ + if (test_bit(BFQ_DISPATCHING, &bfqd->run_state) || + test_and_set_bit_lock(BFQ_DISPATCHING, &bfqd->run_state)) + return NULL; spin_lock_irq(&bfqd->lock); + bfq_do_insert(hctx->queue, &free); + in_serv_queue = bfqd->in_service_queue; waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue); @@ -5315,7 +5391,9 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) waiting_rq && !bfq_bfqq_wait_request(in_serv_queue); } + clear_bit_unlock(BFQ_DISPATCHING, &bfqd->run_state); spin_unlock_irq(&bfqd->lock); + blk_mq_free_requests(&free); bfq_update_dispatch_stats(hctx->queue, rq, idle_timer_disabled ? in_serv_queue : NULL, idle_timer_disabled); @@ -6236,27 +6314,21 @@ static inline void bfq_update_insert_stats(struct request_queue *q, static struct bfq_queue *bfq_init_rq(struct request *rq); -static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, - blk_insert_t flags) +static void bfq_insert_request(struct request_queue *q, struct request *rq, + blk_insert_t flags, struct list_head *free) { - struct request_queue *q = hctx->queue; struct bfq_data *bfqd = q->elevator->elevator_data; struct bfq_queue *bfqq; bool idle_timer_disabled = false; blk_opf_t cmd_flags; - LIST_HEAD(free); #ifdef CONFIG_BFQ_GROUP_IOSCHED if (!cgroup_subsys_on_dfl(io_cgrp_subsys) && rq->bio) bfqg_stats_update_legacy_io(q, rq); #endif - spin_lock_irq(&bfqd->lock); bfqq = bfq_init_rq(rq); - if (blk_mq_sched_try_insert_merge(q, rq, &free)) { - spin_unlock_irq(&bfqd->lock); - blk_mq_free_requests(&free); + if (blk_mq_sched_try_insert_merge(q, rq, free)) return; - } trace_block_rq_insert(rq); @@ -6286,8 +6358,6 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, * merge). */ cmd_flags = rq->cmd_flags; - spin_unlock_irq(&bfqd->lock); - bfq_update_insert_stats(q, bfqq, idle_timer_disabled, cmd_flags); } @@ -6296,13 +6366,15 @@ static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx, struct list_head *list, blk_insert_t flags) { - while (!list_empty(list)) { - struct request *rq; + struct request_queue *q = hctx->queue; + struct bfq_data *bfqd = q->elevator->elevator_data; - rq = list_first_entry(list, struct request, queuelist); - list_del_init(&rq->queuelist); - bfq_insert_request(hctx, rq, flags); - } + spin_lock_irq(&bfqd->insert_lock); + if (flags & BLK_MQ_INSERT_AT_HEAD) + list_splice_init(list, &bfqd->at_head); + else + list_splice_init(list, &bfqd->at_tail); + spin_unlock_irq(&bfqd->insert_lock); } static void bfq_update_hw_tag(struct bfq_data *bfqd) @@ -7211,6 +7283,12 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) q->elevator = eq; spin_unlock_irq(&q->queue_lock); + spin_lock_init(&bfqd->lock); + spin_lock_init(&bfqd->insert_lock); + + INIT_LIST_HEAD(&bfqd->at_head); + INIT_LIST_HEAD(&bfqd->at_tail); + /* * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues. * Grab a permanent reference to it, so that the normal code flow @@ -7329,8 +7407,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) /* see comments on the definition of next field inside bfq_data */ bfqd->actuator_load_threshold = 4; - spin_lock_init(&bfqd->lock); - /* * The invocation of the next bfq_create_group_hierarchy * function is the head of a chain of function calls diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 467e8cfc41a2..f44f5d4ec2f4 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -504,12 +504,26 @@ struct bfq_io_cq { unsigned int requests; /* Number of requests this process has in flight */ }; +enum { + BFQ_DISPATCHING = 0, +}; + /** * struct bfq_data - per-device data structure. * * All the fields are protected by @lock. */ struct bfq_data { + struct { + spinlock_t lock; + spinlock_t insert_lock; + } ____cacheline_aligned_in_smp; + + unsigned long run_state; + + struct list_head at_head; + struct list_head at_tail; + /* device request queue */ struct request_queue *queue; /* dispatch queue */ @@ -795,8 +809,6 @@ struct bfq_data { /* fallback dummy bfqq for extreme OOM conditions */ struct bfq_queue oom_bfqq; - spinlock_t lock; - /* * bic associated with the task issuing current bio for * merging. This and the next field are used as a support to diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 02a916ba62ee..8bf621316a9e 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -79,10 +79,24 @@ struct dd_per_prio { struct io_stats_per_prio stats; }; +enum { + DD_DISPATCHING = 0, +}; + struct deadline_data { /* * run time data */ + struct { + spinlock_t lock; + spinlock_t insert_lock; + spinlock_t zone_lock; + } ____cacheline_aligned_in_smp; + + unsigned long run_state; + + struct list_head at_head; + struct list_head at_tail; struct dd_per_prio per_prio[DD_PRIO_COUNT]; @@ -100,9 +114,6 @@ struct deadline_data { int front_merges; u32 async_depth; int prio_aging_expire; - - spinlock_t lock; - spinlock_t zone_lock; }; /* Maps an I/O priority class to a deadline scheduler priority. */ @@ -113,6 +124,9 @@ static const enum dd_prio ioprio_class_to_prio[] = { [IOPRIO_CLASS_IDLE] = DD_IDLE_PRIO, }; +static void dd_insert_request(struct request_queue *q, struct request *rq, + blk_insert_t flags, struct list_head *free); + static inline struct rb_root * deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq) { @@ -585,6 +599,33 @@ static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd, return NULL; } +static void __dd_do_insert(struct request_queue *q, blk_insert_t flags, + struct list_head *list, struct list_head *free) +{ + while (!list_empty(list)) { + struct request *rq; + + rq = list_first_entry(list, struct request, queuelist); + list_del_init(&rq->queuelist); + dd_insert_request(q, rq, flags, free); + } +} + +static void dd_do_insert(struct request_queue *q, struct list_head *free) +{ + struct deadline_data *dd = q->elevator->elevator_data; + LIST_HEAD(at_head); + LIST_HEAD(at_tail); + + spin_lock(&dd->insert_lock); + list_splice_init(&dd->at_head, &at_head); + list_splice_init(&dd->at_tail, &at_tail); + spin_unlock(&dd->insert_lock); + + __dd_do_insert(q, BLK_MQ_INSERT_AT_HEAD, &at_head, free); + __dd_do_insert(q, 0, &at_tail, free); +} + /* * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests(). * @@ -595,12 +636,27 @@ static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd, */ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) { - struct deadline_data *dd = hctx->queue->elevator->elevator_data; + struct request_queue *q = hctx->queue; + struct deadline_data *dd = q->elevator->elevator_data; const unsigned long now = jiffies; struct request *rq; enum dd_prio prio; + LIST_HEAD(free); + + /* + * If someone else is already dispatching, skip this one. This will + * defer the next dispatch event to when something completes, and could + * potentially lower the queue depth for contended cases. + * + * See the logic in blk_mq_do_dispatch_sched(), which loops and + * retries if nothing is dispatched. + */ + if (test_bit(DD_DISPATCHING, &dd->run_state) || + test_and_set_bit_lock(DD_DISPATCHING, &dd->run_state)) + return NULL; spin_lock(&dd->lock); + dd_do_insert(q, &free); rq = dd_dispatch_prio_aged_requests(dd, now); if (rq) goto unlock; @@ -616,8 +672,10 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) } unlock: + clear_bit_unlock(DD_DISPATCHING, &dd->run_state); spin_unlock(&dd->lock); + blk_mq_free_requests(&free); return rq; } @@ -705,6 +763,13 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e) eq->elevator_data = dd; + spin_lock_init(&dd->lock); + spin_lock_init(&dd->insert_lock); + spin_lock_init(&dd->zone_lock); + + INIT_LIST_HEAD(&dd->at_head); + INIT_LIST_HEAD(&dd->at_tail); + for (prio = 0; prio <= DD_PRIO_MAX; prio++) { struct dd_per_prio *per_prio = &dd->per_prio[prio]; @@ -721,8 +786,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e) dd->last_dir = DD_WRITE; dd->fifo_batch = fifo_batch; dd->prio_aging_expire = prio_aging_expire; - spin_lock_init(&dd->lock); - spin_lock_init(&dd->zone_lock); /* We dispatch from request queue wide instead of hw queue */ blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q); @@ -778,7 +841,19 @@ static bool dd_bio_merge(struct request_queue *q, struct bio *bio, struct request *free = NULL; bool ret; - spin_lock(&dd->lock); + /* + * bio merging is called for every bio queued, and it's very easy + * to run into contention because of that. If we fail getting + * the dd lock, just skip this merge attempt. For related IO, the + * plug will be the successful merging point. If we get here, we + * already failed doing the obvious merge. Chances of actually + * getting a merge off this path is a lot slimmer, so skipping an + * occassional lookup that will most likely not succeed anyway should + * not be a problem. + */ + if (!spin_trylock(&dd->lock)) + return false; + ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); spin_unlock(&dd->lock); @@ -791,10 +866,9 @@ static bool dd_bio_merge(struct request_queue *q, struct bio *bio, /* * add rq to rbtree and fifo */ -static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, +static void dd_insert_request(struct request_queue *q, struct request *rq, blk_insert_t flags, struct list_head *free) { - struct request_queue *q = hctx->queue; struct deadline_data *dd = q->elevator->elevator_data; const enum dd_data_dir data_dir = rq_data_dir(rq); u16 ioprio = req_get_ioprio(rq); @@ -866,19 +940,13 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, { struct request_queue *q = hctx->queue; struct deadline_data *dd = q->elevator->elevator_data; - LIST_HEAD(free); - spin_lock(&dd->lock); - while (!list_empty(list)) { - struct request *rq; - - rq = list_first_entry(list, struct request, queuelist); - list_del_init(&rq->queuelist); - dd_insert_request(hctx, rq, flags, &free); - } - spin_unlock(&dd->lock); - - blk_mq_free_requests(&free); + spin_lock(&dd->insert_lock); + if (flags & BLK_MQ_INSERT_AT_HEAD) + list_splice_init(list, &dd->at_head); + else + list_splice_init(list, &dd->at_tail); + spin_unlock(&dd->insert_lock); } /* Callback from inside blk_mq_rq_ctx_init(). */ @@ -957,6 +1025,10 @@ static bool dd_has_work(struct blk_mq_hw_ctx *hctx) struct deadline_data *dd = hctx->queue->elevator->elevator_data; enum dd_prio prio; + if (!list_empty_careful(&dd->at_head) || + !list_empty_careful(&dd->at_tail)) + return true; + for (prio = 0; prio <= DD_PRIO_MAX; prio++) if (dd_has_work_for_prio(&dd->per_prio[prio])) return true; -- 2.45.0 From 557b640278bcac54de88fa14a1a507dd7f0cedc9 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Sat, 27 Apr 2024 20:12:59 +0200 Subject: [PATCH 5/9] cachy Signed-off-by: Peter Jung --- .../admin-guide/kernel-parameters.txt | 9 + Makefile | 162 +- arch/arm/Makefile | 56 +- arch/x86/Kconfig.cpu | 426 +- arch/x86/Makefile | 19 +- arch/x86/Makefile_32.cpu | 41 - arch/x86/include/asm/pci.h | 6 + arch/x86/include/asm/vermagic.h | 74 + arch/x86/pci/common.c | 7 +- block/bfq-iosched.c | 6 + block/elevator.c | 10 + drivers/ata/ahci.c | 23 +- drivers/cpufreq/Kconfig.x86 | 2 - drivers/extcon/Kconfig | 7 + drivers/extcon/Makefile | 1 + drivers/extcon/extcon-steamdeck.c | 180 + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 + drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 2 - .../gpu/drm/amd/amdgpu/atombios_encoders.c | 10 +- drivers/gpu/drm/amd/display/Kconfig | 6 + .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 35 +- .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 2 +- .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 6 +- .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 6 +- .../drm/amd/display/dc/link/link_validation.c | 11 + drivers/gpu/drm/amd/pm/amdgpu_pm.c | 3 + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 14 +- .../gpu/drm/drm_panel_orientation_quirks.c | 7 + drivers/hwmon/Kconfig | 11 + drivers/hwmon/Makefile | 1 + drivers/hwmon/steamdeck-hwmon.c | 294 + drivers/i2c/busses/Kconfig | 9 + drivers/i2c/busses/Makefile | 1 + drivers/i2c/busses/i2c-nct6775.c | 648 ++ drivers/i2c/busses/i2c-piix4.c | 4 +- drivers/input/evdev.c | 19 +- drivers/leds/Kconfig | 7 + drivers/leds/Makefile | 1 + drivers/leds/leds-steamdeck.c | 74 + drivers/md/dm-crypt.c | 5 + drivers/media/v4l2-core/Kconfig | 5 + drivers/media/v4l2-core/Makefile | 2 + drivers/media/v4l2-core/v4l2loopback.c | 3184 +++++++++ drivers/media/v4l2-core/v4l2loopback.h | 98 + .../media/v4l2-core/v4l2loopback_formats.h | 445 ++ drivers/mfd/Kconfig | 11 + drivers/mfd/Makefile | 2 + drivers/mfd/steamdeck.c | 147 + drivers/net/wireless/ath/ath11k/core.c | 88 + drivers/net/wireless/ath/ath11k/core.h | 1 + drivers/net/wireless/ath/ath11k/hw.c | 1 + drivers/net/wireless/ath/ath11k/hw.h | 1 + drivers/net/wireless/ath/ath11k/mac.c | 86 +- drivers/net/wireless/ath/ath11k/mhi.c | 1 + drivers/net/wireless/ath/ath11k/pci.c | 50 +- drivers/net/wireless/ath/ath11k/pcic.c | 11 + drivers/net/wireless/ath/ath11k/wmi.c | 27 + drivers/net/wireless/ath/ath11k/wmi.h | 87 + drivers/pci/controller/Makefile | 6 + drivers/pci/controller/intel-nvme-remap.c | 462 ++ drivers/pci/quirks.c | 109 +- drivers/platform/x86/Kconfig | 10 + drivers/platform/x86/Makefile | 1 + drivers/platform/x86/legion-laptop.c | 6089 +++++++++++++++++ include/linux/pagemap.h | 2 +- include/linux/user_namespace.h | 4 + init/Kconfig | 26 + kernel/Kconfig.hz | 24 + kernel/fork.c | 14 + kernel/sched/fair.c | 13 + kernel/sched/sched.h | 2 +- kernel/sysctl.c | 12 + kernel/user_namespace.c | 7 + mm/Kconfig | 2 +- mm/compaction.c | 4 + mm/huge_memory.c | 4 + mm/page-writeback.c | 8 + mm/page_alloc.c | 27 +- mm/swap.c | 5 + mm/vmpressure.c | 4 + mm/vmscan.c | 8 + sound/soc/amd/acp/acp-mach-common.c | 104 + sound/soc/amd/acp/acp-mach.h | 3 +- sound/soc/codecs/max98388.c | 24 +- 85 files changed, 13214 insertions(+), 223 deletions(-) create mode 100644 drivers/extcon/extcon-steamdeck.c create mode 100644 drivers/hwmon/steamdeck-hwmon.c create mode 100644 drivers/i2c/busses/i2c-nct6775.c create mode 100644 drivers/leds/leds-steamdeck.c create mode 100644 drivers/media/v4l2-core/v4l2loopback.c create mode 100644 drivers/media/v4l2-core/v4l2loopback.h create mode 100644 drivers/media/v4l2-core/v4l2loopback_formats.h create mode 100644 drivers/mfd/steamdeck.c create mode 100644 drivers/pci/controller/intel-nvme-remap.c create mode 100644 drivers/platform/x86/legion-laptop.c diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index dd62df26186f..b7be9c04180c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4395,6 +4395,15 @@ nomsi [MSI] If the PCI_MSI kernel config parameter is enabled, this kernel boot option can be used to disable the use of MSI interrupts system-wide. + pcie_acs_override = + [PCIE] Override missing PCIe ACS support for: + downstream + All downstream ports - full ACS capabilities + multfunction + All multifunction devices - multifunction ACS subset + id:nnnn:nnnn + Specfic device - full ACS capabilities + Specified as vid:did (vendor/device ID) in hex noioapicquirk [APIC] Disable all boot interrupt quirks. Safety option to keep boot IRQs enabled. This should never be necessary. diff --git a/Makefile b/Makefile index 2917a6914c03..16d523bc5081 100644 --- a/Makefile +++ b/Makefile @@ -808,9 +808,164 @@ endif # need-config KBUILD_CFLAGS += -fno-delete-null-pointer-checks +# This selects which ARM instruction set is used. +arch-$(CONFIG_CPU_32v7M) :=-march=armv7-m +arch-$(CONFIG_CPU_32v7) :=-march=armv7-a +arch-$(CONFIG_CPU_32v6) :=-march=armv6 +# Only override the compiler option if ARMv6. The ARMv6K extensions are +# always available in ARMv7 +ifeq ($(CONFIG_CPU_32v6),y) +arch-$(CONFIG_CPU_32v6K) :=-march=armv6k +endif +arch-$(CONFIG_CPU_32v5) :=-march=armv5te +arch-$(CONFIG_CPU_32v4T) :=-march=armv4t +arch-$(CONFIG_CPU_32v4) :=-march=armv4 +arch-$(CONFIG_CPU_32v3) :=-march=armv3m + +# Note that GCC does not numerically define an architecture version +# macro, but instead defines a whole series of macros which makes +# testing for a specific architecture or later rather impossible. +cpp-$(CONFIG_CPU_32v7M) :=-D__LINUX_ARM_ARCH__=7 +cpp-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 +cpp-$(CONFIG_CPU_32v6) :=-D__LINUX_ARM_ARCH__=6 +# Only override the compiler option if ARMv6. The ARMv6K extensions are +# always available in ARMv7 +ifeq ($(CONFIG_CPU_32v6),y) +cpp-$(CONFIG_CPU_32v6K) :=-D__LINUX_ARM_ARCH__=6 +endif +cpp-$(CONFIG_CPU_32v5) :=-D__LINUX_ARM_ARCH__=5 +cpp-$(CONFIG_CPU_32v4T) :=-D__LINUX_ARM_ARCH__=4 +cpp-$(CONFIG_CPU_32v4) :=-D__LINUX_ARM_ARCH__=4 +cpp-$(CONFIG_CPU_32v3) :=-D__LINUX_ARM_ARCH__=3 + +# This selects how we optimise for the processor. +tune-$(CONFIG_CPU_ARM7TDMI) :=-mtune=arm7tdmi +tune-$(CONFIG_CPU_ARM720T) :=-mtune=arm7tdmi +tune-$(CONFIG_CPU_ARM740T) :=-mtune=arm7tdmi +tune-$(CONFIG_CPU_ARM9TDMI) :=-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM940T) :=-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM946E) :=-mtune=arm9e +tune-$(CONFIG_CPU_ARM920T) :=-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM922T) :=-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM925T) :=-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM926T) :=-mtune=arm9tdmi +tune-$(CONFIG_CPU_FA526) :=-mtune=arm9tdmi +tune-$(CONFIG_CPU_SA110) :=-mtune=strongarm110 +tune-$(CONFIG_CPU_SA1100) :=-mtune=strongarm1100 +tune-$(CONFIG_CPU_XSCALE) :=-mtune=xscale +tune-$(CONFIG_CPU_XSC3) :=-mtune=xscale +tune-$(CONFIG_CPU_FEROCEON) :=-mtune=xscale +tune-$(CONFIG_CPU_V6) :=-mtune=arm1136j-s +tune-$(CONFIG_CPU_V6K) :=-mtune=arm1136j-s + +KBUILD_CPPFLAGS +=$(cpp-y) +KBUILD_CFLAGS +=$(arch-y) $(tune-y) +KBUILD_AFLAGS +=$(arch-y) $(tune-y) + +# This selects which x86 instruction set is used. +cflags-$(CONFIG_M486SX) += -march=i486 +cflags-$(CONFIG_M486) += -march=i486 +cflags-$(CONFIG_M586) += -march=i586 +cflags-$(CONFIG_M586TSC) += -march=i586 +cflags-$(CONFIG_M586MMX) += -march=pentium-mmx +cflags-$(CONFIG_M686) += -march=i686 +cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call tune,pentium2) +cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call tune,pentium3) +cflags-$(CONFIG_MPENTIUMM) += -march=i686 $(call tune,pentium3) +cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call tune,pentium4) +cflags-$(CONFIG_MK6) += -march=k6 +# Please note, that patches that add -march=athlon-xp and friends are pointless. +# They make zero difference whatsosever to performance at this time. +cflags-$(CONFIG_MK7) += -march=athlon +cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon) +cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align) +cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align) +cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) +cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) +cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align) +cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) +cflags-$(CONFIG_MVIAC7) += -march=i686 +cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) +cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \ +$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) + +# AMD Elan support +cflags-$(CONFIG_MELAN) += -march=i486 + +# Geode GX1 support +cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx +cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx) +# add at the end to overwrite eventual tuning options from earlier +# cpu entries +cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) + +# Bug fix for binutils: this option is required in order to keep +# binutils from generating NOPL instructions against our will. +ifneq ($(CONFIG_X86_P6_NOP),y) +cflags-y += $(call cc-option,-Wa$(comma)-mtune=generic32,) +endif + +# x86_64 instruction set +cflags64-$(CONFIG_MK8) += -march=k8 +cflags64-$(CONFIG_MPSC) += -march=nocona +cflags64-$(CONFIG_MK8SSE3) += -march=k8-sse3 +cflags64-$(CONFIG_MK10) += -march=amdfam10 +cflags64-$(CONFIG_MBARCELONA) += -march=barcelona +cflags64-$(CONFIG_MBOBCAT) += -march=btver1 +cflags64-$(CONFIG_MJAGUAR) += -march=btver2 +cflags64-$(CONFIG_MBULLDOZER) += -march=bdver1 +cflags64-$(CONFIG_MPILEDRIVER) += -march=bdver2 -mno-tbm +cflags64-$(CONFIG_MSTEAMROLLER) += -march=bdver3 -mno-tbm +cflags64-$(CONFIG_MEXCAVATOR) += -march=bdver4 -mno-tbm +cflags64-$(CONFIG_MZEN) += -march=znver1 +cflags64-$(CONFIG_MZEN2) += -march=znver2 +cflags64-$(CONFIG_MZEN3) += -march=znver3 +cflags64-$(CONFIG_MZEN4) += -march=znver4 +cflags64-$(CONFIG_MNATIVE_INTEL) += -march=native +cflags64-$(CONFIG_MNATIVE_AMD) += -march=native +cflags64-$(CONFIG_MATOM) += -march=bonnell +cflags64-$(CONFIG_MCORE2) += -march=core2 +cflags64-$(CONFIG_MNEHALEM) += -march=nehalem +cflags64-$(CONFIG_MWESTMERE) += -march=westmere +cflags64-$(CONFIG_MSILVERMONT) += -march=silvermont +cflags64-$(CONFIG_MGOLDMONT) += -march=goldmont +cflags64-$(CONFIG_MGOLDMONTPLUS) += -march=goldmont-plus +cflags64-$(CONFIG_MSANDYBRIDGE) += -march=sandybridge +cflags64-$(CONFIG_MIVYBRIDGE) += -march=ivybridge +cflags64-$(CONFIG_MHASWELL) += -march=haswell +cflags64-$(CONFIG_MBROADWELL) += -march=broadwell +cflags64-$(CONFIG_MSKYLAKE) += -march=skylake +cflags64-$(CONFIG_MSKYLAKEX) += -march=skylake-avx512 +cflags64-$(CONFIG_MCANNONLAKE) += -march=cannonlake +cflags64-$(CONFIG_MICELAKE) += -march=icelake-client +cflags64-$(CONFIG_MCASCADELAKE) += -march=cascadelake +cflags64-$(CONFIG_MCOOPERLAKE) += -march=cooperlake +cflags64-$(CONFIG_MTIGERLAKE) += -march=tigerlake +cflags64-$(CONFIG_MSAPPHIRERAPIDS) += -march=sapphirerapids +cflags64-$(CONFIG_MROCKETLAKE) += -march=rocketlake +cflags64-$(CONFIG_MALDERLAKE) += -march=alderlake +cflags64-$(CONFIG_MRAPTORLAKE) += -march=raptorlake +cflags64-$(CONFIG_MMETEORLAKE) += -march=meteorlake +cflags64-$(CONFIG_MEMERALDRAPIDS) += -march=emeraldrapids +cflags64-$(CONFIG_GENERIC_CPU2) += -march=x86-64-v2 +cflags64-$(CONFIG_GENERIC_CPU3) += -march=x86-64-v3 +cflags64-$(CONFIG_GENERIC_CPU4) += -march=x86-64-v4 +cflags64-$(CONFIG_GENERIC_CPU) += -mtune=generic +KBUILD_CFLAGS += $(cflags64-y) + +rustflags64-$(CONFIG_MK8) += -Ctarget-cpu=k8 +rustflags64-$(CONFIG_MPSC) += -Ctarget-cpu=nocona +rustflags64-$(CONFIG_MCORE2) += -Ctarget-cpu=core2 +rustflags64-$(CONFIG_MATOM) += -Ctarget-cpu=atom +rustflags64-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic +KBUILD_RUSTFLAGS += $(rustflags64-y) + ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE KBUILD_CFLAGS += -O2 KBUILD_RUSTFLAGS += -Copt-level=2 +else ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3 +KBUILD_CFLAGS += -O3 +KBUILD_RUSTFLAGS += -Copt-level=3 else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += -Os KBUILD_RUSTFLAGS += -Copt-level=s @@ -990,15 +1145,18 @@ KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3) KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-option, -Wno-stringop-overflow) KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) +# disable GCC vectorization on trees +KBUILD_CFLAGS += $(call cc-option, -fno-tree-vectorize) + # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += -fno-strict-overflow # Make sure -fstack-check isn't enabled (like gentoo apparently did) KBUILD_CFLAGS += -fno-stack-check -# conserve stack if available +# conserve stack, ivopts and modulo-sched if available ifdef CONFIG_CC_IS_GCC -KBUILD_CFLAGS += -fconserve-stack +KBUILD_CFLAGS += -fconserve-stack -fivopts -fmodulo-sched endif # change __FILE__ to the relative path from the srctree diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 473280d5adce..c7596c898625 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -59,56 +59,6 @@ endif # KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra) -# This selects which instruction set is used. -arch-$(CONFIG_CPU_32v7M) :=-march=armv7-m -arch-$(CONFIG_CPU_32v7) :=-march=armv7-a -arch-$(CONFIG_CPU_32v6) :=-march=armv6 -# Only override the compiler option if ARMv6. The ARMv6K extensions are -# always available in ARMv7 -ifeq ($(CONFIG_CPU_32v6),y) -arch-$(CONFIG_CPU_32v6K) :=-march=armv6k -endif -arch-$(CONFIG_CPU_32v5) :=-march=armv5te -arch-$(CONFIG_CPU_32v4T) :=-march=armv4t -arch-$(CONFIG_CPU_32v4) :=-march=armv4 -arch-$(CONFIG_CPU_32v3) :=-march=armv3m - -# Note that GCC does not numerically define an architecture version -# macro, but instead defines a whole series of macros which makes -# testing for a specific architecture or later rather impossible. -cpp-$(CONFIG_CPU_32v7M) :=-D__LINUX_ARM_ARCH__=7 -cpp-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 -cpp-$(CONFIG_CPU_32v6) :=-D__LINUX_ARM_ARCH__=6 -# Only override the compiler option if ARMv6. The ARMv6K extensions are -# always available in ARMv7 -ifeq ($(CONFIG_CPU_32v6),y) -cpp-$(CONFIG_CPU_32v6K) :=-D__LINUX_ARM_ARCH__=6 -endif -cpp-$(CONFIG_CPU_32v5) :=-D__LINUX_ARM_ARCH__=5 -cpp-$(CONFIG_CPU_32v4T) :=-D__LINUX_ARM_ARCH__=4 -cpp-$(CONFIG_CPU_32v4) :=-D__LINUX_ARM_ARCH__=4 -cpp-$(CONFIG_CPU_32v3) :=-D__LINUX_ARM_ARCH__=3 - -# This selects how we optimise for the processor. -tune-$(CONFIG_CPU_ARM7TDMI) :=-mtune=arm7tdmi -tune-$(CONFIG_CPU_ARM720T) :=-mtune=arm7tdmi -tune-$(CONFIG_CPU_ARM740T) :=-mtune=arm7tdmi -tune-$(CONFIG_CPU_ARM9TDMI) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM940T) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM946E) :=-mtune=arm9e -tune-$(CONFIG_CPU_ARM920T) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM922T) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM925T) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM926T) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_FA526) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_SA110) :=-mtune=strongarm110 -tune-$(CONFIG_CPU_SA1100) :=-mtune=strongarm1100 -tune-$(CONFIG_CPU_XSCALE) :=-mtune=xscale -tune-$(CONFIG_CPU_XSC3) :=-mtune=xscale -tune-$(CONFIG_CPU_FEROCEON) :=-mtune=xscale -tune-$(CONFIG_CPU_V6) :=-mtune=arm1136j-s -tune-$(CONFIG_CPU_V6K) :=-mtune=arm1136j-s - ifeq ($(CONFIG_AEABI),y) CFLAGS_ABI :=-mabi=aapcs-linux -mfpu=vfp else @@ -140,9 +90,9 @@ AFLAGS_ISA :=$(CFLAGS_ISA) endif # Need -Uarm for gcc < 3.x -KBUILD_CPPFLAGS +=$(cpp-y) -KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_ISA) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm -KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) -Wa,$(arch-y) $(tune-y) -include asm/unified.h -msoft-float + +KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_ISA) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm +KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) -Wa,-include asm/unified.h -msoft-float CHECKFLAGS += -D__arm__ diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2a7279d80460..b6a64a959e09 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -157,7 +157,7 @@ config MPENTIUM4 config MK6 - bool "K6/K6-II/K6-III" + bool "AMD K6/K6-II/K6-III" depends on X86_32 help Select this for an AMD K6-family processor. Enables use of @@ -165,7 +165,7 @@ config MK6 flags to GCC. config MK7 - bool "Athlon/Duron/K7" + bool "AMD Athlon/Duron/K7" depends on X86_32 help Select this for an AMD Athlon K7-family processor. Enables use of @@ -173,12 +173,106 @@ config MK7 flags to GCC. config MK8 - bool "Opteron/Athlon64/Hammer/K8" + bool "AMD Opteron/Athlon64/Hammer/K8" help Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables use of some extended instructions, and passes appropriate optimization flags to GCC. +config MK8SSE3 + bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3" + help + Select this for improved AMD Opteron or Athlon64 Hammer-family processors. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. + +config MK10 + bool "AMD 61xx/7x50/PhenomX3/X4/II/K10" + help + Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50, + Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. + +config MBARCELONA + bool "AMD Barcelona" + help + Select this for AMD Family 10h Barcelona processors. + + Enables -march=barcelona + +config MBOBCAT + bool "AMD Bobcat" + help + Select this for AMD Family 14h Bobcat processors. + + Enables -march=btver1 + +config MJAGUAR + bool "AMD Jaguar" + help + Select this for AMD Family 16h Jaguar processors. + + Enables -march=btver2 + +config MBULLDOZER + bool "AMD Bulldozer" + help + Select this for AMD Family 15h Bulldozer processors. + + Enables -march=bdver1 + +config MPILEDRIVER + bool "AMD Piledriver" + help + Select this for AMD Family 15h Piledriver processors. + + Enables -march=bdver2 + +config MSTEAMROLLER + bool "AMD Steamroller" + help + Select this for AMD Family 15h Steamroller processors. + + Enables -march=bdver3 + +config MEXCAVATOR + bool "AMD Excavator" + help + Select this for AMD Family 15h Excavator processors. + + Enables -march=bdver4 + +config MZEN + bool "AMD Zen" + help + Select this for AMD Family 17h Zen processors. + + Enables -march=znver1 + +config MZEN2 + bool "AMD Zen 2" + help + Select this for AMD Family 17h Zen 2 processors. + + Enables -march=znver2 + +config MZEN3 + bool "AMD Zen 3" + depends on (CC_IS_GCC && GCC_VERSION >= 100300) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + help + Select this for AMD Family 19h Zen 3 processors. + + Enables -march=znver3 + +config MZEN4 + bool "AMD Zen 4" + depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 160000) + help + Select this for AMD Family 19h Zen 4 processors. + + Enables -march=znver4 + config MCRUSOE bool "Crusoe" depends on X86_32 @@ -270,7 +364,7 @@ config MPSC in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. config MCORE2 - bool "Core 2/newer Xeon" + bool "Intel Core 2" help Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and @@ -278,6 +372,8 @@ config MCORE2 family in /proc/cpuinfo. Newer ones have 6 and older ones 15 (not a typo) + Enables -march=core2 + config MATOM bool "Intel Atom" help @@ -287,6 +383,212 @@ config MATOM accordingly optimized code. Use a recent GCC with specific Atom support in order to fully benefit from selecting this option. +config MNEHALEM + bool "Intel Nehalem" + select X86_P6_NOP + help + + Select this for 1st Gen Core processors in the Nehalem family. + + Enables -march=nehalem + +config MWESTMERE + bool "Intel Westmere" + select X86_P6_NOP + help + + Select this for the Intel Westmere formerly Nehalem-C family. + + Enables -march=westmere + +config MSILVERMONT + bool "Intel Silvermont" + select X86_P6_NOP + help + + Select this for the Intel Silvermont platform. + + Enables -march=silvermont + +config MGOLDMONT + bool "Intel Goldmont" + select X86_P6_NOP + help + + Select this for the Intel Goldmont platform including Apollo Lake and Denverton. + + Enables -march=goldmont + +config MGOLDMONTPLUS + bool "Intel Goldmont Plus" + select X86_P6_NOP + help + + Select this for the Intel Goldmont Plus platform including Gemini Lake. + + Enables -march=goldmont-plus + +config MSANDYBRIDGE + bool "Intel Sandy Bridge" + select X86_P6_NOP + help + + Select this for 2nd Gen Core processors in the Sandy Bridge family. + + Enables -march=sandybridge + +config MIVYBRIDGE + bool "Intel Ivy Bridge" + select X86_P6_NOP + help + + Select this for 3rd Gen Core processors in the Ivy Bridge family. + + Enables -march=ivybridge + +config MHASWELL + bool "Intel Haswell" + select X86_P6_NOP + help + + Select this for 4th Gen Core processors in the Haswell family. + + Enables -march=haswell + +config MBROADWELL + bool "Intel Broadwell" + select X86_P6_NOP + help + + Select this for 5th Gen Core processors in the Broadwell family. + + Enables -march=broadwell + +config MSKYLAKE + bool "Intel Skylake" + select X86_P6_NOP + help + + Select this for 6th Gen Core processors in the Skylake family. + + Enables -march=skylake + +config MSKYLAKEX + bool "Intel Skylake X" + select X86_P6_NOP + help + + Select this for 6th Gen Core processors in the Skylake X family. + + Enables -march=skylake-avx512 + +config MCANNONLAKE + bool "Intel Cannon Lake" + select X86_P6_NOP + help + + Select this for 8th Gen Core processors + + Enables -march=cannonlake + +config MICELAKE + bool "Intel Ice Lake" + select X86_P6_NOP + help + + Select this for 10th Gen Core processors in the Ice Lake family. + + Enables -march=icelake-client + +config MCASCADELAKE + bool "Intel Cascade Lake" + select X86_P6_NOP + help + + Select this for Xeon processors in the Cascade Lake family. + + Enables -march=cascadelake + +config MCOOPERLAKE + bool "Intel Cooper Lake" + depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000) + select X86_P6_NOP + help + + Select this for Xeon processors in the Cooper Lake family. + + Enables -march=cooperlake + +config MTIGERLAKE + bool "Intel Tiger Lake" + depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000) + select X86_P6_NOP + help + + Select this for third-generation 10 nm process processors in the Tiger Lake family. + + Enables -march=tigerlake + +config MSAPPHIRERAPIDS + bool "Intel Sapphire Rapids" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + select X86_P6_NOP + help + + Select this for fourth-generation 10 nm process processors in the Sapphire Rapids family. + + Enables -march=sapphirerapids + +config MROCKETLAKE + bool "Intel Rocket Lake" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + select X86_P6_NOP + help + + Select this for eleventh-generation processors in the Rocket Lake family. + + Enables -march=rocketlake + +config MALDERLAKE + bool "Intel Alder Lake" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + select X86_P6_NOP + help + + Select this for twelfth-generation processors in the Alder Lake family. + + Enables -march=alderlake + +config MRAPTORLAKE + bool "Intel Raptor Lake" + depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500) + select X86_P6_NOP + help + + Select this for thirteenth-generation processors in the Raptor Lake family. + + Enables -march=raptorlake + +config MMETEORLAKE + bool "Intel Meteor Lake" + depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500) + select X86_P6_NOP + help + + Select this for fourteenth-generation processors in the Meteor Lake family. + + Enables -march=meteorlake + +config MEMERALDRAPIDS + bool "Intel Emerald Rapids" + depends on (CC_IS_GCC && GCC_VERSION > 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500) + select X86_P6_NOP + help + + Select this for fifth-generation 10 nm process processors in the Emerald Rapids family. + + Enables -march=emeraldrapids + config GENERIC_CPU bool "Generic-x86-64" depends on X86_64 @@ -294,6 +596,50 @@ config GENERIC_CPU Generic x86-64 CPU. Run equally well on all x86-64 CPUs. +config GENERIC_CPU2 + bool "Generic-x86-64-v2" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + depends on X86_64 + help + Generic x86-64 CPU. + Run equally well on all x86-64 CPUs with min support of x86-64-v2. + +config GENERIC_CPU3 + bool "Generic-x86-64-v3" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + depends on X86_64 + help + Generic x86-64-v3 CPU with v3 instructions. + Run equally well on all x86-64 CPUs with min support of x86-64-v3. + +config GENERIC_CPU4 + bool "Generic-x86-64-v4" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + depends on X86_64 + help + Generic x86-64 CPU with v4 instructions. + Run equally well on all x86-64 CPUs with min support of x86-64-v4. + +config MNATIVE_INTEL + bool "Intel-Native optimizations autodetected by the compiler" + help + + Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects + the optimum settings to use based on your processor. Do NOT use this + for AMD CPUs. Intel Only! + + Enables -march=native + +config MNATIVE_AMD + bool "AMD-Native optimizations autodetected by the compiler" + help + + Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects + the optimum settings to use based on your processor. Do NOT use this + for Intel CPUs. AMD Only! + + Enables -march=native + endchoice config X86_GENERIC @@ -318,9 +664,17 @@ config X86_INTERNODE_CACHE_SHIFT config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || MPSC - default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || MK8SSE3 || MK10 \ + || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER \ + || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MNEHALEM || MWESTMERE || MSILVERMONT \ + || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL \ + || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE \ + || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE \ + || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD || X86_GENERIC || GENERIC_CPU || GENERIC_CPU2 \ + || GENERIC_CPU3 || GENERIC_CPU4 default "4" if MELAN || M486SX || M486 || MGEODEGX1 - default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX + default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII \ + || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX config X86_F00F_BUG def_bool y @@ -332,15 +686,27 @@ config X86_INVD_BUG config X86_ALIGNMENT_16 def_bool y - depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486SX || M486 || MVIAC3_2 || MGEODEGX1 + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC \ + || M586 || M486SX || M486 || MVIAC3_2 || MGEODEGX1 config X86_INTEL_USERCOPY def_bool y - depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC \ + || MK8 || MK7 || MEFFICEON || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT \ + || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX \ + || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS \ + || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL config X86_USE_PPRO_CHECKSUM def_bool y - depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM \ + || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX \ + || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER \ + || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MNEHALEM \ + || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE \ + || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE \ + || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE \ + || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD # # P6_NOPs are a relatively minor optimization that require a family >= @@ -356,11 +722,22 @@ config X86_USE_PPRO_CHECKSUM config X86_P6_NOP def_bool y depends on X86_64 - depends on (MCORE2 || MPENTIUM4 || MPSC) + depends on (MCORE2 || MPENTIUM4 || MPSC || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT \ + || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE \ + || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE \ + || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS \ + || MNATIVE_INTEL) config X86_TSC def_bool y - depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64 + depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM \ + || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 \ + || MGEODE_LX || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER \ + || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MNEHALEM \ + || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL \ + || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE \ + || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS \ + || MNATIVE_INTEL || MNATIVE_AMD) || X86_64 config X86_HAVE_PAE def_bool y @@ -368,24 +745,43 @@ config X86_HAVE_PAE config X86_CMPXCHG64 def_bool y - depends on X86_HAVE_PAE || M586TSC || M586MMX || MK6 || MK7 + depends on X86_HAVE_PAE || M586TSC || M586MMX || MK6 || MK7 || MK8 || MK8SSE3 || MK10 \ + || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN \ + || MZEN2 || MZEN3 || MZEN4 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS \ + || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE \ + || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE \ + || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD # this should be set for all -march=.. options where the compiler # generates cmov. config X86_CMOV def_bool y - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) + depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 \ + || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX || MK8SSE3 || MK10 \ + || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR \ + || MZEN || MZEN2 || MZEN3 || MZEN4 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT \ + || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX \ + || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS \ + || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD) config X86_MINIMUM_CPU_FAMILY int default "64" if X86_64 - default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8) + default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 \ + || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8 || MK8SSE3 \ + || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER \ + || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MNEHALEM || MWESTMERE || MSILVERMONT \ + || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL \ + || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE \ + || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MRAPTORLAKE \ + || MNATIVE_INTEL || MNATIVE_AMD) default "5" if X86_32 && X86_CMPXCHG64 default "4" config X86_DEBUGCTLMSR def_bool y - depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486SX || M486) && !UML + depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 \ + || M486SX || M486) && !UML config IA32_FEAT_CTL def_bool y diff --git a/arch/x86/Makefile b/arch/x86/Makefile index da8f3caf2781..adf396b23669 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -67,8 +67,8 @@ export BITS # # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383 # -KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 +KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -mno-avx512f +KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-avx512f ifeq ($(CONFIG_X86_KERNEL_IBT),y) # @@ -149,21 +149,6 @@ else # Use -mskip-rax-setup if supported. KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) - # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) - cflags-$(CONFIG_MK8) += -march=k8 - cflags-$(CONFIG_MPSC) += -march=nocona - cflags-$(CONFIG_MCORE2) += -march=core2 - cflags-$(CONFIG_MATOM) += -march=atom - cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic - KBUILD_CFLAGS += $(cflags-y) - - rustflags-$(CONFIG_MK8) += -Ctarget-cpu=k8 - rustflags-$(CONFIG_MPSC) += -Ctarget-cpu=nocona - rustflags-$(CONFIG_MCORE2) += -Ctarget-cpu=core2 - rustflags-$(CONFIG_MATOM) += -Ctarget-cpu=atom - rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic - KBUILD_RUSTFLAGS += $(rustflags-y) - KBUILD_CFLAGS += -mno-red-zone KBUILD_CFLAGS += -mcmodel=kernel KBUILD_RUSTFLAGS += -Cno-redzone=y diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 94834c4b5e5e..81923b4afdf8 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -10,44 +10,3 @@ else align := -falign-functions=0 -falign-jumps=0 -falign-loops=0 endif -cflags-$(CONFIG_M486SX) += -march=i486 -cflags-$(CONFIG_M486) += -march=i486 -cflags-$(CONFIG_M586) += -march=i586 -cflags-$(CONFIG_M586TSC) += -march=i586 -cflags-$(CONFIG_M586MMX) += -march=pentium-mmx -cflags-$(CONFIG_M686) += -march=i686 -cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call tune,pentium2) -cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call tune,pentium3) -cflags-$(CONFIG_MPENTIUMM) += -march=i686 $(call tune,pentium3) -cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call tune,pentium4) -cflags-$(CONFIG_MK6) += -march=k6 -# Please note, that patches that add -march=athlon-xp and friends are pointless. -# They make zero difference whatsosever to performance at this time. -cflags-$(CONFIG_MK7) += -march=athlon -cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon) -cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align) -cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align) -cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) -cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) -cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align) -cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) -cflags-$(CONFIG_MVIAC7) += -march=i686 -cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) -cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \ - $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) - -# AMD Elan support -cflags-$(CONFIG_MELAN) += -march=i486 - -# Geode GX1 support -cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx -cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx) -# add at the end to overwrite eventual tuning options from earlier -# cpu entries -cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) - -# Bug fix for binutils: this option is required in order to keep -# binutils from generating NOPL instructions against our will. -ifneq ($(CONFIG_X86_P6_NOP),y) -cflags-y += $(call cc-option,-Wa$(comma)-mtune=generic32,) -endif diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index b40c462b4af3..c4e66e60d559 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -27,6 +27,7 @@ struct pci_sysdata { #if IS_ENABLED(CONFIG_VMD) struct pci_dev *vmd_dev; /* VMD Device if in Intel VMD domain */ #endif + struct pci_dev *nvme_remap_dev; /* AHCI Device if NVME remapped bus */ }; extern int pci_routeirq; @@ -70,6 +71,11 @@ static inline bool is_vmd(struct pci_bus *bus) #define is_vmd(bus) false #endif /* CONFIG_VMD */ +static inline bool is_nvme_remap(struct pci_bus *bus) +{ + return to_pci_sysdata(bus)->nvme_remap_dev != NULL; +} + /* Can be used to override the logic in pci_scan_bus for skipping already-configured bus numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the loader */ diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h index 75884d2cdec3..02c1386eb653 100644 --- a/arch/x86/include/asm/vermagic.h +++ b/arch/x86/include/asm/vermagic.h @@ -17,6 +17,54 @@ #define MODULE_PROC_FAMILY "586MMX " #elif defined CONFIG_MCORE2 #define MODULE_PROC_FAMILY "CORE2 " +#elif defined CONFIG_MNATIVE_INTEL +#define MODULE_PROC_FAMILY "NATIVE_INTEL " +#elif defined CONFIG_MNATIVE_AMD +#define MODULE_PROC_FAMILY "NATIVE_AMD " +#elif defined CONFIG_MNEHALEM +#define MODULE_PROC_FAMILY "NEHALEM " +#elif defined CONFIG_MWESTMERE +#define MODULE_PROC_FAMILY "WESTMERE " +#elif defined CONFIG_MSILVERMONT +#define MODULE_PROC_FAMILY "SILVERMONT " +#elif defined CONFIG_MGOLDMONT +#define MODULE_PROC_FAMILY "GOLDMONT " +#elif defined CONFIG_MGOLDMONTPLUS +#define MODULE_PROC_FAMILY "GOLDMONTPLUS " +#elif defined CONFIG_MSANDYBRIDGE +#define MODULE_PROC_FAMILY "SANDYBRIDGE " +#elif defined CONFIG_MIVYBRIDGE +#define MODULE_PROC_FAMILY "IVYBRIDGE " +#elif defined CONFIG_MHASWELL +#define MODULE_PROC_FAMILY "HASWELL " +#elif defined CONFIG_MBROADWELL +#define MODULE_PROC_FAMILY "BROADWELL " +#elif defined CONFIG_MSKYLAKE +#define MODULE_PROC_FAMILY "SKYLAKE " +#elif defined CONFIG_MSKYLAKEX +#define MODULE_PROC_FAMILY "SKYLAKEX " +#elif defined CONFIG_MCANNONLAKE +#define MODULE_PROC_FAMILY "CANNONLAKE " +#elif defined CONFIG_MICELAKE +#define MODULE_PROC_FAMILY "ICELAKE " +#elif defined CONFIG_MCASCADELAKE +#define MODULE_PROC_FAMILY "CASCADELAKE " +#elif defined CONFIG_MCOOPERLAKE +#define MODULE_PROC_FAMILY "COOPERLAKE " +#elif defined CONFIG_MTIGERLAKE +#define MODULE_PROC_FAMILY "TIGERLAKE " +#elif defined CONFIG_MSAPPHIRERAPIDS +#define MODULE_PROC_FAMILY "SAPPHIRERAPIDS " +#elif defined CONFIG_ROCKETLAKE +#define MODULE_PROC_FAMILY "ROCKETLAKE " +#elif defined CONFIG_MALDERLAKE +#define MODULE_PROC_FAMILY "ALDERLAKE " +#elif defined CONFIG_MRAPTORLAKE +#define MODULE_PROC_FAMILY "RAPTORLAKE " +#elif defined CONFIG_MMETEORLAKE +#define MODULE_PROC_FAMILY "METEORLAKE " +#elif defined CONFIG_MEMERALDRAPIDS +#define MODULE_PROC_FAMILY "EMERALDRAPIDS " #elif defined CONFIG_MATOM #define MODULE_PROC_FAMILY "ATOM " #elif defined CONFIG_M686 @@ -35,6 +83,32 @@ #define MODULE_PROC_FAMILY "K7 " #elif defined CONFIG_MK8 #define MODULE_PROC_FAMILY "K8 " +#elif defined CONFIG_MK8SSE3 +#define MODULE_PROC_FAMILY "K8SSE3 " +#elif defined CONFIG_MK10 +#define MODULE_PROC_FAMILY "K10 " +#elif defined CONFIG_MBARCELONA +#define MODULE_PROC_FAMILY "BARCELONA " +#elif defined CONFIG_MBOBCAT +#define MODULE_PROC_FAMILY "BOBCAT " +#elif defined CONFIG_MBULLDOZER +#define MODULE_PROC_FAMILY "BULLDOZER " +#elif defined CONFIG_MPILEDRIVER +#define MODULE_PROC_FAMILY "PILEDRIVER " +#elif defined CONFIG_MSTEAMROLLER +#define MODULE_PROC_FAMILY "STEAMROLLER " +#elif defined CONFIG_MJAGUAR +#define MODULE_PROC_FAMILY "JAGUAR " +#elif defined CONFIG_MEXCAVATOR +#define MODULE_PROC_FAMILY "EXCAVATOR " +#elif defined CONFIG_MZEN +#define MODULE_PROC_FAMILY "ZEN " +#elif defined CONFIG_MZEN2 +#define MODULE_PROC_FAMILY "ZEN2 " +#elif defined CONFIG_MZEN3 +#define MODULE_PROC_FAMILY "ZEN3 " +#elif defined CONFIG_MZEN4 +#define MODULE_PROC_FAMILY "ZEN4 " #elif defined CONFIG_MELAN #define MODULE_PROC_FAMILY "ELAN " #elif defined CONFIG_MCRUSOE diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index ddb798603201..7c20387d8202 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -723,12 +723,15 @@ int pci_ext_cfg_avail(void) return 0; } -#if IS_ENABLED(CONFIG_VMD) struct pci_dev *pci_real_dma_dev(struct pci_dev *dev) { +#if IS_ENABLED(CONFIG_VMD) if (is_vmd(dev->bus)) return to_pci_sysdata(dev->bus)->vmd_dev; +#endif + + if (is_nvme_remap(dev->bus)) + return to_pci_sysdata(dev->bus)->nvme_remap_dev; return dev; } -#endif diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 9bd57baa4b0b..efe818271cf7 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -7703,6 +7703,7 @@ MODULE_ALIAS("bfq-iosched"); static int __init bfq_init(void) { int ret; + char msg[60] = "BFQ I/O-scheduler: BFQ-CachyOS v6.8"; #ifdef CONFIG_BFQ_GROUP_IOSCHED ret = blkcg_policy_register(&blkcg_policy_bfq); @@ -7734,6 +7735,11 @@ static int __init bfq_init(void) if (ret) goto slab_kill; +#ifdef CONFIG_BFQ_GROUP_IOSCHED + strcat(msg, " (with cgroups support)"); +#endif + pr_info("%s", msg); + return 0; slab_kill: diff --git a/block/elevator.c b/block/elevator.c index 5ff093cb3cf8..1c93fe91b006 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -574,9 +574,19 @@ static struct elevator_type *elevator_get_default(struct request_queue *q) if (q->nr_hw_queues != 1 && !blk_mq_is_shared_tags(q->tag_set->flags)) +#if defined(CONFIG_CACHY) && defined(CONFIG_MQ_IOSCHED_KYBER) + return elevator_find_get(q, "kyber"); +#elif defined(CONFIG_CACHY) + return elevator_find_get(q, "mq-deadline"); +#else return NULL; +#endif +#if defined(CONFIG_CACHY) && defined(CONFIG_IOSCHED_BFQ) + return elevator_find_get(q, "bfq"); +#else return elevator_find_get(q, "mq-deadline"); +#endif } /* diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index df3fd6474bf2..4303eb5fe11b 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1547,7 +1547,7 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance) } #endif -static void ahci_remap_check(struct pci_dev *pdev, int bar, +static int ahci_remap_check(struct pci_dev *pdev, int bar, struct ahci_host_priv *hpriv) { int i; @@ -1560,7 +1560,7 @@ static void ahci_remap_check(struct pci_dev *pdev, int bar, pci_resource_len(pdev, bar) < SZ_512K || bar != AHCI_PCI_BAR_STANDARD || !(readl(hpriv->mmio + AHCI_VSCAP) & 1)) - return; + return 0; cap = readq(hpriv->mmio + AHCI_REMAP_CAP); for (i = 0; i < AHCI_MAX_REMAP; i++) { @@ -1575,18 +1575,11 @@ static void ahci_remap_check(struct pci_dev *pdev, int bar, } if (!hpriv->remapped_nvme) - return; - - dev_warn(&pdev->dev, "Found %u remapped NVMe devices.\n", - hpriv->remapped_nvme); - dev_warn(&pdev->dev, - "Switch your BIOS from RAID to AHCI mode to use them.\n"); + return 0; - /* - * Don't rely on the msi-x capability in the remap case, - * share the legacy interrupt across ahci and remapped devices. - */ - hpriv->flags |= AHCI_HFLAG_NO_MSI; + /* Abort probe, allowing intel-nvme-remap to step in when available */ + dev_info(&pdev->dev, "Device will be handled by intel-nvme-remap.\n"); + return -ENODEV; } static int ahci_get_irq_vector(struct ata_host *host, int port) @@ -1806,7 +1799,9 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar]; /* detect remapped nvme devices */ - ahci_remap_check(pdev, ahci_pci_bar, hpriv); + rc = ahci_remap_check(pdev, ahci_pci_bar, hpriv); + if (rc) + return rc; sysfs_add_file_to_group(&pdev->dev.kobj, &dev_attr_remapped_nvme.attr, diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 438c9e75a04d..1bbfeca5f01e 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -9,7 +9,6 @@ config X86_INTEL_PSTATE select ACPI_PROCESSOR if ACPI select ACPI_CPPC_LIB if X86_64 && ACPI && SCHED_MC_PRIO select CPU_FREQ_GOV_PERFORMANCE - select CPU_FREQ_GOV_SCHEDUTIL if SMP help This driver provides a P state for Intel core processors. The driver implements an internal governor and will become @@ -39,7 +38,6 @@ config X86_AMD_PSTATE depends on X86 && ACPI select ACPI_PROCESSOR select ACPI_CPPC_LIB if X86_64 - select CPU_FREQ_GOV_SCHEDUTIL if SMP help This driver adds a CPUFreq driver which utilizes a fine grain processor performance frequency control range instead of legacy diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig index 5f869eacd19a..90f51661a489 100644 --- a/drivers/extcon/Kconfig +++ b/drivers/extcon/Kconfig @@ -202,4 +202,11 @@ config EXTCON_RTK_TYPE_C The DHC (Digital Home Hub) RTD series SoC contains a type c module. This driver will detect the status of the type-c port. +config EXTCON_STEAMDECK + tristate "Steam Deck extcon support" + depends on MFD_STEAMDECK + help + Say Y here to enable support of USB Type C cable detection extcon + support on Steam Deck devices + endif diff --git a/drivers/extcon/Makefile b/drivers/extcon/Makefile index f779adb5e4c7..6e0569b21d2f 100644 --- a/drivers/extcon/Makefile +++ b/drivers/extcon/Makefile @@ -26,3 +26,4 @@ obj-$(CONFIG_EXTCON_USB_GPIO) += extcon-usb-gpio.o obj-$(CONFIG_EXTCON_USBC_CROS_EC) += extcon-usbc-cros-ec.o obj-$(CONFIG_EXTCON_USBC_TUSB320) += extcon-usbc-tusb320.o obj-$(CONFIG_EXTCON_RTK_TYPE_C) += extcon-rtk-type-c.o +obj-$(CONFIG_EXTCON_STEAMDECK) += extcon-steamdeck.o diff --git a/drivers/extcon/extcon-steamdeck.c b/drivers/extcon/extcon-steamdeck.c new file mode 100644 index 000000000000..74f190adc8ea --- /dev/null +++ b/drivers/extcon/extcon-steamdeck.c @@ -0,0 +1,180 @@ + +#include +#include +#include + +#define ACPI_STEAMDECK_NOTIFY_STATUS 0x80 + +/* 0 - port connected, 1 -port disconnected */ +#define ACPI_STEAMDECK_PORT_CONNECT BIT(0) +/* 0 - Upstream Facing Port, 1 - Downdstream Facing Port */ +#define ACPI_STEAMDECK_CUR_DATA_ROLE BIT(3) +/* + * Debouncing delay to allow negotiation process to settle. 2s value + * was arrived at via trial and error. + */ +#define STEAMDECK_ROLE_SWITCH_DELAY (msecs_to_jiffies(2000)) + +struct steamdeck_extcon { + struct acpi_device *adev; + struct delayed_work role_work; + struct extcon_dev *edev; + struct device *dev; +}; + +static int steamdeck_read_pdcs(struct steamdeck_extcon *sd, unsigned long long *pdcs) +{ + acpi_status status; + + status = acpi_evaluate_integer(sd->adev->handle, "PDCS", NULL, pdcs); + if (ACPI_FAILURE(status)) { + dev_err(sd->dev, "PDCS evaluation failed: %s\n", + acpi_format_exception(status)); + return -EIO; + } + + return 0; +} + +static void steamdeck_usb_role_work(struct work_struct *work) +{ + struct steamdeck_extcon *sd = + container_of(work, struct steamdeck_extcon, role_work.work); + unsigned long long pdcs; + bool usb_host; + + if (steamdeck_read_pdcs(sd, &pdcs)) + return; + + /* + * We only care about these two + */ + pdcs &= ACPI_STEAMDECK_PORT_CONNECT | ACPI_STEAMDECK_CUR_DATA_ROLE; + + /* + * For "connect" events our role is determined by a bit in + * PDCS, for "disconnect" we switch to being a gadget + * unconditionally. The thinking for the latter is we don't + * want to start acting as a USB host until we get + * confirmation from the firmware that we are a USB host + */ + usb_host = (pdcs & ACPI_STEAMDECK_PORT_CONNECT) ? + pdcs & ACPI_STEAMDECK_CUR_DATA_ROLE : false; + + dev_dbg(sd->dev, "USB role is %s\n", usb_host ? "host" : "device"); + WARN_ON(extcon_set_state_sync(sd->edev, EXTCON_USB_HOST, + usb_host)); + +} + +static void steamdeck_notify(acpi_handle handle, u32 event, void *context) +{ + struct device *dev = context; + struct steamdeck_extcon *sd = dev_get_drvdata(dev); + unsigned long long pdcs; + unsigned long delay; + + switch (event) { + case ACPI_STEAMDECK_NOTIFY_STATUS: + if (steamdeck_read_pdcs(sd, &pdcs)) + return; + /* + * We process "disconnect" events immediately and + * "connect" events with a delay to give the HW time + * to settle. For example attaching USB hub (at least + * for HW used for testing) will generate intermediary + * event with "host" bit not set, followed by the one + * that does have it set. + */ + delay = (pdcs & ACPI_STEAMDECK_PORT_CONNECT) ? + STEAMDECK_ROLE_SWITCH_DELAY : 0; + + queue_delayed_work(system_long_wq, &sd->role_work, delay); + break; + default: + dev_warn(dev, "Unsupported event [0x%x]\n", event); + } +} + +static void steamdeck_remove_notify_handler(void *data) +{ + struct steamdeck_extcon *sd = data; + + acpi_remove_notify_handler(sd->adev->handle, ACPI_DEVICE_NOTIFY, + steamdeck_notify); + cancel_delayed_work_sync(&sd->role_work); +} + +static const unsigned int steamdeck_extcon_cable[] = { + EXTCON_USB, + EXTCON_USB_HOST, + EXTCON_CHG_USB_SDP, + EXTCON_CHG_USB_CDP, + EXTCON_CHG_USB_DCP, + EXTCON_CHG_USB_ACA, + EXTCON_NONE, +}; + +static int steamdeck_extcon_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct steamdeck_extcon *sd; + acpi_status status; + int ret; + + sd = devm_kzalloc(dev, sizeof(*sd), GFP_KERNEL); + if (!sd) + return -ENOMEM; + + INIT_DELAYED_WORK(&sd->role_work, steamdeck_usb_role_work); + platform_set_drvdata(pdev, sd); + sd->adev = ACPI_COMPANION(dev->parent); + sd->dev = dev; + sd->edev = devm_extcon_dev_allocate(dev, steamdeck_extcon_cable); + if (IS_ERR(sd->edev)) + return PTR_ERR(sd->edev); + + ret = devm_extcon_dev_register(dev, sd->edev); + if (ret < 0) { + dev_err(dev, "Failed to register extcon device: %d\n", ret); + return ret; + } + + /* + * Set initial role value + */ + queue_delayed_work(system_long_wq, &sd->role_work, 0); + flush_delayed_work(&sd->role_work); + + status = acpi_install_notify_handler(sd->adev->handle, + ACPI_DEVICE_NOTIFY, + steamdeck_notify, + dev); + if (ACPI_FAILURE(status)) { + dev_err(dev, "Error installing ACPI notify handler\n"); + return -EIO; + } + + ret = devm_add_action_or_reset(dev, steamdeck_remove_notify_handler, + sd); + return ret; +} + +static const struct platform_device_id steamdeck_extcon_id_table[] = { + { .name = "steamdeck-extcon" }, + {} +}; +MODULE_DEVICE_TABLE(platform, steamdeck_extcon_id_table); + +static struct platform_driver steamdeck_extcon_driver = { + .probe = steamdeck_extcon_probe, + .driver = { + .name = "steamdeck-extcon", + }, + .id_table = steamdeck_extcon_id_table, +}; +module_platform_driver(steamdeck_extcon_driver); + +MODULE_AUTHOR("Andrey Smirnov "); +MODULE_DESCRIPTION("Steam Deck extcon driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 79827a6dcd7f..ee85a2352771 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -153,6 +153,7 @@ struct amdgpu_watchdog_timer */ extern int amdgpu_modeset; extern unsigned int amdgpu_vram_limit; +extern int amdgpu_ignore_min_pcap; extern int amdgpu_vis_vram_limit; extern int amdgpu_gart_size; extern int amdgpu_gtt_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 64b1bb240424..6000aa9690b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -132,6 +132,7 @@ enum AMDGPU_DEBUG_MASK { }; unsigned int amdgpu_vram_limit = UINT_MAX; +int amdgpu_ignore_min_pcap = 0; /* do not ignore by default */ int amdgpu_vis_vram_limit; int amdgpu_gart_size = -1; /* auto */ int amdgpu_gtt_size = -1; /* auto */ @@ -241,6 +242,15 @@ struct amdgpu_watchdog_timer amdgpu_watchdog_timer = { .period = 0x0, /* default to 0x0 (timeout disable) */ }; +/** + * DOC: ignore_min_pcap (int) + * Ignore the minimum power cap. + * Useful on graphics cards where the minimum power cap is very high. + * The default is 0 (Do not ignore). + */ +MODULE_PARM_DESC(ignore_min_pcap, "Ignore the minimum power cap"); +module_param_named(ignore_min_pcap, amdgpu_ignore_min_pcap, int, 0600); + /** * DOC: vramlimit (int) * Restrict the total amount of VRAM in MiB for testing. The default is 0 (Use full VRAM). diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 2e4911050cc5..f5805d0f8f01 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -436,8 +436,6 @@ struct amdgpu_mode_info { struct drm_property *regamma_tf_property; }; -#define AMDGPU_MAX_BL_LEVEL 0xFF - struct amdgpu_backlight_privdata { struct amdgpu_encoder *encoder; uint8_t negative; diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 7672abe6c140..568a816bcf58 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -39,6 +39,10 @@ #include #include "bif/bif_4_1_d.h" + +/* Maximum backlight level. */ +#define AMDGPU_ATOM_MAX_BL_LEVEL 0xFF + u8 amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev) { @@ -127,8 +131,8 @@ static u8 amdgpu_atombios_encoder_backlight_level(struct backlight_device *bd) /* Convert brightness to hardware level */ if (bd->props.brightness < 0) level = 0; - else if (bd->props.brightness > AMDGPU_MAX_BL_LEVEL) - level = AMDGPU_MAX_BL_LEVEL; + else if (bd->props.brightness > AMDGPU_ATOM_MAX_BL_LEVEL) + level = AMDGPU_ATOM_MAX_BL_LEVEL; else level = bd->props.brightness; @@ -198,7 +202,7 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode } memset(&props, 0, sizeof(props)); - props.max_brightness = AMDGPU_MAX_BL_LEVEL; + props.max_brightness = AMDGPU_ATOM_MAX_BL_LEVEL; props.type = BACKLIGHT_RAW; snprintf(bl_name, sizeof(bl_name), "amdgpu_bl%d", dev->primary->index); diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 901d1961b739..05c49141f580 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -51,4 +51,10 @@ config DRM_AMD_SECURE_DISPLAY This option enables the calculation of crc of specific region via debugfs. Cooperate with specific DMCU FW. +config AMD_PRIVATE_COLOR + bool "Enable KMS color management by AMD for AMD" + default n + help + This option extends the KMS color management API with AMD driver-specific properties to enhance the color management support on AMD Steam Deck. + endmenu diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 718e533ab46d..0e57b9c481aa 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -153,6 +153,9 @@ MODULE_FIRMWARE(FIRMWARE_DCN_35_DMUB); /* Number of bytes in PSP footer for firmware. */ #define PSP_FOOTER_BYTES 0x100 +/* Maximum backlight level. */ +#define AMDGPU_MAX_BL_LEVEL 0xFFF + /** * DOC: overview * @@ -4072,7 +4075,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) return r; } -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR if (amdgpu_dm_create_color_properties(adev)) return -ENOMEM; #endif @@ -4087,7 +4090,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) return 0; } -#define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 12 +#define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 0 #define AMDGPU_DM_DEFAULT_MAX_BACKLIGHT 255 #define AUX_BL_DEFAULT_TRANSITION_TIME_MS 50 @@ -4105,11 +4108,27 @@ static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm, amdgpu_acpi_get_backlight_caps(&caps); if (caps.caps_valid) { dm->backlight_caps[bl_idx].caps_valid = true; + + printk(KERN_NOTICE"VLV Successfully queried backlight range over ACPI: %d %d\n", + (int) caps.min_input_signal, (int) caps.max_input_signal); + + if ( caps.min_input_signal != AMDGPU_DM_DEFAULT_MIN_BACKLIGHT || + caps.max_input_signal != AMDGPU_DM_DEFAULT_MAX_BACKLIGHT ) + { + caps.min_input_signal = AMDGPU_DM_DEFAULT_MIN_BACKLIGHT; + caps.max_input_signal = AMDGPU_DM_DEFAULT_MAX_BACKLIGHT; + + printk(KERN_NOTICE"VLV OVERRIDE backlight range: %d %d\n", + (int) caps.min_input_signal, (int) caps.max_input_signal); + } + if (caps.aux_support) return; dm->backlight_caps[bl_idx].min_input_signal = caps.min_input_signal; dm->backlight_caps[bl_idx].max_input_signal = caps.max_input_signal; } else { + printk(KERN_NOTICE"VLV ACPI does not provide backlight range, using defaults: %d %d\n", + AMDGPU_DM_DEFAULT_MIN_BACKLIGHT, AMDGPU_DM_DEFAULT_MAX_BACKLIGHT); dm->backlight_caps[bl_idx].min_input_signal = AMDGPU_DM_DEFAULT_MIN_BACKLIGHT; dm->backlight_caps[bl_idx].max_input_signal = @@ -4119,6 +4138,9 @@ static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm, if (dm->backlight_caps[bl_idx].aux_support) return; + printk(KERN_NOTICE"VLV Kernel built without ACPI. using backlight range defaults: %d %d\n", + AMDGPU_DM_DEFAULT_MIN_BACKLIGHT, AMDGPU_DM_DEFAULT_MAX_BACKLIGHT); + dm->backlight_caps[bl_idx].min_input_signal = AMDGPU_DM_DEFAULT_MIN_BACKLIGHT; dm->backlight_caps[bl_idx].max_input_signal = AMDGPU_DM_DEFAULT_MAX_BACKLIGHT; #endif @@ -4150,7 +4172,7 @@ static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *c if (!get_brightness_range(caps, &min, &max)) return brightness; - // Rescale 0..255 to min..max + // Rescale 0..AMDGPU_MAX_BL_LEVEL to min..max return min + DIV_ROUND_CLOSEST((max - min) * brightness, AMDGPU_MAX_BL_LEVEL); } @@ -4165,7 +4187,7 @@ static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *cap if (brightness < min) return 0; - // Rescale min..max to 0..255 + // Rescale min..max to 0..AMDGPU_MAX_BL_LEVEL return DIV_ROUND_CLOSEST(AMDGPU_MAX_BL_LEVEL * (brightness - min), max - min); } @@ -7455,7 +7477,8 @@ static int amdgpu_dm_connector_get_modes(struct drm_connector *connector) drm_add_modes_noedid(connector, 1920, 1080); } else { amdgpu_dm_connector_ddc_get_modes(connector, edid); - amdgpu_dm_connector_add_common_modes(encoder, connector); + if (connector->connector_type != DRM_MODE_CONNECTOR_eDP) + amdgpu_dm_connector_add_common_modes(encoder, connector); amdgpu_dm_connector_add_freesync_modes(connector, edid); } amdgpu_dm_fbc_init(connector); @@ -8211,7 +8234,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, int planes_count = 0, vpos, hpos; unsigned long flags; u32 target_vblank, last_flip_vblank; - bool vrr_active = amdgpu_dm_crtc_vrr_active(acrtc_state); + bool vrr_active = true;//amdgpu_dm_crtc_vrr_active(acrtc_state); bool cursor_update = false; bool pflip_present = false; bool dirty_rects_changed = false; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index c87b64e464ed..6fe07243adc3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -97,7 +97,7 @@ static inline struct fixed31_32 amdgpu_dm_fixpt_from_s3132(__u64 x) return val; } -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR /* Pre-defined Transfer Functions (TF) * * AMD driver supports pre-defined mathematical functions for transferring diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 6e715ef3a556..11c7199ec3b3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -290,7 +290,7 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) } #endif -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR /** * dm_crtc_additional_color_mgmt - enable additional color properties * @crtc: DRM CRTC @@ -372,7 +372,7 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { #if defined(CONFIG_DEBUG_FS) .late_register = amdgpu_dm_crtc_late_register, #endif -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR .atomic_set_property = amdgpu_dm_atomic_crtc_set_property, .atomic_get_property = amdgpu_dm_atomic_crtc_get_property, #endif @@ -551,7 +551,7 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR dm_crtc_additional_color_mgmt(&acrtc->base); #endif return 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 8a4c40b4c27e..779880c64575 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1468,7 +1468,7 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane, drm_atomic_helper_plane_destroy_state(plane, state); } -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR static void dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, struct drm_plane *plane) @@ -1659,7 +1659,7 @@ static const struct drm_plane_funcs dm_plane_funcs = { .atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state, .atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state, .format_mod_supported = amdgpu_dm_plane_format_mod_supported, -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR .atomic_set_property = dm_atomic_plane_set_property, .atomic_get_property = dm_atomic_plane_get_property, #endif @@ -1742,7 +1742,7 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, drm_plane_helper_add(plane, &dm_plane_helper_funcs); -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR dm_atomic_plane_attach_color_mgmt_properties(dm, plane); #endif /* Create (reset) the plane state */ diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c index 5b0bc7f6a188..5a8b0b7c1907 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c @@ -35,6 +35,8 @@ #define DC_LOGGER_INIT(logger) +static const uint8_t DP_SINK_BRANCH_DEV_NAME_KT50X0[] = "KT50X0!"; + static uint32_t get_tmds_output_pixel_clock_100hz(const struct dc_crtc_timing *timing) { @@ -278,6 +280,15 @@ static bool dp_validate_mode_timing( timing->v_addressable == (uint32_t) 480) return true; + if (link->ctx->dce_version == DCN_VERSION_3_01 && + link->dpcd_caps.sink_dev_id == DP_BRANCH_DEVICE_ID_0060AD && + memcmp(&link->dpcd_caps.branch_dev_name, + DP_SINK_BRANCH_DEV_NAME_KT50X0, + sizeof(link->dpcd_caps.branch_dev_name)) == 0) { + if (timing->pix_clk_100hz / 10 >= (uint32_t) 1200000) + return false; /* KT50X0 does not support Pxl clock >= 1200MHz */ + } + link_setting = dp_get_verified_link_cap(link); /* TODO: DYNAMIC_VALIDATION needs to be implemented */ diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index b4698f985600..6a8e641c5130 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -3034,6 +3034,9 @@ static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev, struct device_attribute *attr, char *buf) { + if (amdgpu_ignore_min_pcap) + return sysfs_emit(buf, "%i\n", 0); + return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_MIN); } diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 0ad947df777a..7b82e3ef7c91 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2695,7 +2695,10 @@ int smu_get_power_limit(void *handle, *limit = smu->max_power_limit; break; case SMU_PPT_LIMIT_MIN: - *limit = smu->min_power_limit; + if (amdgpu_ignore_min_pcap) + *limit = 0; + else + *limit = smu->min_power_limit; break; default: return -EINVAL; @@ -2719,7 +2722,14 @@ static int smu_set_power_limit(void *handle, uint32_t limit) if (smu->ppt_funcs->set_power_limit) return smu->ppt_funcs->set_power_limit(smu, limit_type, limit); - if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) { + if (amdgpu_ignore_min_pcap) { + if ((limit > smu->max_power_limit)) { + dev_err(smu->adev->dev, + "New power limit (%d) is over the max allowed %d\n", + limit, smu->max_power_limit); + return -EINVAL; + } + } else if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) { dev_err(smu->adev->dev, "New power limit (%d) is out of range [%d,%d]\n", limit, smu->min_power_limit, smu->max_power_limit); diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index aa93129c3397..426bbee2d9f5 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -421,6 +421,13 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "1"), }, .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* Valve Steam Deck */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Valve"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Galileo"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "1"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* VIOS LTH17 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "VIOS"), diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index a608264da87d..cf0fc2ce1017 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -1974,6 +1974,17 @@ config SENSORS_SCH5636 This driver can also be built as a module. If so, the module will be called sch5636. +config SENSORS_STEAMDECK + tristate "Steam Deck EC sensors" + depends on MFD_STEAMDECK + help + If you say yes here you get support for the hardware + monitoring features exposed by EC firmware on Steam Deck + devices + + This driver can also be built as a module. If so, the module + will be called steamdeck-hwmon. + config SENSORS_STTS751 tristate "ST Microelectronics STTS751" depends on I2C diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 47be39af5c03..b1cc9112a0a6 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -200,6 +200,7 @@ obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o obj-$(CONFIG_SENSORS_SMSC47M1) += smsc47m1.o obj-$(CONFIG_SENSORS_SMSC47M192)+= smsc47m192.o obj-$(CONFIG_SENSORS_SPARX5) += sparx5-temp.o +obj-$(CONFIG_SENSORS_STEAMDECK) += steamdeck-hwmon.o obj-$(CONFIG_SENSORS_STTS751) += stts751.o obj-$(CONFIG_SENSORS_SY7636A) += sy7636a-hwmon.o obj-$(CONFIG_SENSORS_AMC6821) += amc6821.o diff --git a/drivers/hwmon/steamdeck-hwmon.c b/drivers/hwmon/steamdeck-hwmon.c new file mode 100644 index 000000000000..9d0a5471b181 --- /dev/null +++ b/drivers/hwmon/steamdeck-hwmon.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Steam Deck EC sensors driver + * + * Copyright (C) 2021-2022 Valve Corporation + */ + +#include +#include +#include + +#define STEAMDECK_HWMON_NAME "steamdeck-hwmon" + +struct steamdeck_hwmon { + struct acpi_device *adev; +}; + +static long +steamdeck_hwmon_get(struct steamdeck_hwmon *sd, const char *method) +{ + unsigned long long val; + if (ACPI_FAILURE(acpi_evaluate_integer(sd->adev->handle, + (char *)method, NULL, &val))) + return -EIO; + + return val; +} + +static int +steamdeck_hwmon_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *out) +{ + struct steamdeck_hwmon *sd = dev_get_drvdata(dev); + + switch (type) { + case hwmon_curr: + if (attr != hwmon_curr_input) + return -EOPNOTSUPP; + + *out = steamdeck_hwmon_get(sd, "PDAM"); + if (*out < 0) + return *out; + break; + case hwmon_in: + if (attr != hwmon_in_input) + return -EOPNOTSUPP; + + *out = steamdeck_hwmon_get(sd, "PDVL"); + if (*out < 0) + return *out; + break; + case hwmon_temp: + if (attr != hwmon_temp_input) + return -EOPNOTSUPP; + + *out = steamdeck_hwmon_get(sd, "BATT"); + if (*out < 0) + return *out; + /* + * Assuming BATT returns deg C we need to mutiply it + * by 1000 to convert to mC + */ + *out *= 1000; + break; + case hwmon_fan: + switch (attr) { + case hwmon_fan_input: + *out = steamdeck_hwmon_get(sd, "FANR"); + if (*out < 0) + return *out; + break; + case hwmon_fan_target: + *out = steamdeck_hwmon_get(sd, "FSSR"); + if (*out < 0) + return *out; + break; + case hwmon_fan_fault: + *out = steamdeck_hwmon_get(sd, "FANC"); + if (*out < 0) + return *out; + /* + * FANC (Fan check): + * 0: Abnormal + * 1: Normal + */ + *out = !*out; + break; + default: + return -EOPNOTSUPP; + } + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int +steamdeck_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, const char **str) +{ + switch (type) { + /* + * These two aren't, strictly speaking, measured. EC + * firmware just reports what PD negotiation resulted + * in. + */ + case hwmon_curr: + *str = "PD Contract Current"; + break; + case hwmon_in: + *str = "PD Contract Voltage"; + break; + case hwmon_temp: + *str = "Battery Temp"; + break; + case hwmon_fan: + *str = "System Fan"; + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int +steamdeck_hwmon_write(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long val) +{ + struct steamdeck_hwmon *sd = dev_get_drvdata(dev); + + if (type != hwmon_fan || + attr != hwmon_fan_target) + return -EOPNOTSUPP; + + val = clamp_val(val, 0, 7300); + + if (ACPI_FAILURE(acpi_execute_simple_method(sd->adev->handle, + "FANS", val))) + return -EIO; + + return 0; +} + +static umode_t +steamdeck_hwmon_is_visible(const void *data, enum hwmon_sensor_types type, + u32 attr, int channel) +{ + if (type == hwmon_fan && + attr == hwmon_fan_target) + return 0644; + + return 0444; +} + +static const struct hwmon_channel_info *steamdeck_hwmon_info[] = { + HWMON_CHANNEL_INFO(in, + HWMON_I_INPUT | HWMON_I_LABEL), + HWMON_CHANNEL_INFO(curr, + HWMON_C_INPUT | HWMON_C_LABEL), + HWMON_CHANNEL_INFO(temp, + HWMON_T_INPUT | HWMON_T_LABEL), + HWMON_CHANNEL_INFO(fan, + HWMON_F_INPUT | HWMON_F_LABEL | + HWMON_F_TARGET | HWMON_F_FAULT), + NULL +}; + +static const struct hwmon_ops steamdeck_hwmon_ops = { + .is_visible = steamdeck_hwmon_is_visible, + .read = steamdeck_hwmon_read, + .read_string = steamdeck_hwmon_read_string, + .write = steamdeck_hwmon_write, +}; + +static const struct hwmon_chip_info steamdeck_hwmon_chip_info = { + .ops = &steamdeck_hwmon_ops, + .info = steamdeck_hwmon_info, +}; + + +static ssize_t +steamdeck_hwmon_simple_store(struct device *dev, const char *buf, size_t count, + const char *method, + unsigned long upper_limit) +{ + struct steamdeck_hwmon *sd = dev_get_drvdata(dev); + unsigned long value; + + if (kstrtoul(buf, 10, &value) || value >= upper_limit) + return -EINVAL; + + if (ACPI_FAILURE(acpi_execute_simple_method(sd->adev->handle, + (char *)method, value))) + return -EIO; + + return count; +} + +static ssize_t +steamdeck_hwmon_simple_show(struct device *dev, char *buf, + const char *method) +{ + struct steamdeck_hwmon *sd = dev_get_drvdata(dev); + unsigned long value; + + value = steamdeck_hwmon_get(sd, method); + if (value < 0) + return value; + + return sprintf(buf, "%ld\n", value); +} + +#define STEAMDECK_HWMON_ATTR_RW(_name, _set_method, _get_method, \ + _upper_limit) \ + static ssize_t _name##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ + { \ + return steamdeck_hwmon_simple_show(dev, buf, \ + _get_method); \ + } \ + static ssize_t _name##_store(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, size_t count) \ + { \ + return steamdeck_hwmon_simple_store(dev, buf, count, \ + _set_method, \ + _upper_limit); \ + } \ + static DEVICE_ATTR_RW(_name) + +STEAMDECK_HWMON_ATTR_RW(max_battery_charge_level, "FCBL", "SFBL", 101); +STEAMDECK_HWMON_ATTR_RW(max_battery_charge_rate, "CHGR", "GCHR", 101); + +static struct attribute *steamdeck_hwmon_attributes[] = { + &dev_attr_max_battery_charge_level.attr, + &dev_attr_max_battery_charge_rate.attr, + NULL +}; + +static const struct attribute_group steamdeck_hwmon_group = { + .attrs = steamdeck_hwmon_attributes, +}; + +static const struct attribute_group *steamdeck_hwmon_groups[] = { + &steamdeck_hwmon_group, + NULL +}; + +static int steamdeck_hwmon_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct steamdeck_hwmon *sd; + struct device *hwmon; + + sd = devm_kzalloc(dev, sizeof(*sd), GFP_KERNEL); + if (!sd) + return -ENOMEM; + + sd->adev = ACPI_COMPANION(dev->parent); + hwmon = devm_hwmon_device_register_with_info(dev, + "steamdeck_hwmon", + sd, + &steamdeck_hwmon_chip_info, + steamdeck_hwmon_groups); + if (IS_ERR(hwmon)) { + dev_err(dev, "Failed to register HWMON device"); + return PTR_ERR(hwmon); + } + + return 0; +} + +static const struct platform_device_id steamdeck_hwmon_id_table[] = { + { .name = STEAMDECK_HWMON_NAME }, + {} +}; +MODULE_DEVICE_TABLE(platform, steamdeck_hwmon_id_table); + +static struct platform_driver steamdeck_hwmon_driver = { + .probe = steamdeck_hwmon_probe, + .driver = { + .name = STEAMDECK_HWMON_NAME, + }, + .id_table = steamdeck_hwmon_id_table, +}; +module_platform_driver(steamdeck_hwmon_driver); + +MODULE_AUTHOR("Andrey Smirnov "); +MODULE_DESCRIPTION("Steam Deck EC sensors driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 28eb48dd5b32..1cf4c700b108 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -230,6 +230,15 @@ config I2C_CHT_WC combined with a FUSB302 Type-C port-controller as such it is advised to also select CONFIG_TYPEC_FUSB302=m. +config I2C_NCT6775 + tristate "Nuvoton NCT6775 and compatible SMBus controller" + help + If you say yes to this option, support will be included for the + Nuvoton NCT6775 and compatible SMBus controllers. + + This driver can also be built as a module. If so, the module + will be called i2c-nct6775. + config I2C_NFORCE2 tristate "Nvidia nForce2, nForce3 and nForce4" depends on PCI diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index aa0ee8ecd6f2..020714113e9a 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_I2C_CHT_WC) += i2c-cht-wc.o obj-$(CONFIG_I2C_I801) += i2c-i801.o obj-$(CONFIG_I2C_ISCH) += i2c-isch.o obj-$(CONFIG_I2C_ISMT) += i2c-ismt.o +obj-$(CONFIG_I2C_NCT6775) += i2c-nct6775.o obj-$(CONFIG_I2C_NFORCE2) += i2c-nforce2.o obj-$(CONFIG_I2C_NFORCE2_S4985) += i2c-nforce2-s4985.o obj-$(CONFIG_I2C_NVIDIA_GPU) += i2c-nvidia-gpu.o diff --git a/drivers/i2c/busses/i2c-nct6775.c b/drivers/i2c/busses/i2c-nct6775.c new file mode 100644 index 000000000000..e919d1e10c51 --- /dev/null +++ b/drivers/i2c/busses/i2c-nct6775.c @@ -0,0 +1,648 @@ +/* + * i2c-nct6775 - Driver for the SMBus master functionality of + * Nuvoton NCT677x Super-I/O chips + * + * Copyright (C) 2019 Adam Honse + * + * Derived from nct6775 hwmon driver + * Copyright (C) 2012 Guenter Roeck + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRVNAME "i2c-nct6775" + +/* Nuvoton SMBus address offsets */ +#define SMBHSTDAT (0 + nuvoton_nct6793d_smba) +#define SMBBLKSZ (1 + nuvoton_nct6793d_smba) +#define SMBHSTCMD (2 + nuvoton_nct6793d_smba) +#define SMBHSTIDX (3 + nuvoton_nct6793d_smba) //Index field is the Command field on other controllers +#define SMBHSTCTL (4 + nuvoton_nct6793d_smba) +#define SMBHSTADD (5 + nuvoton_nct6793d_smba) +#define SMBHSTERR (9 + nuvoton_nct6793d_smba) +#define SMBHSTSTS (0xE + nuvoton_nct6793d_smba) + +/* Command register */ +#define NCT6793D_READ_BYTE 0 +#define NCT6793D_READ_WORD 1 +#define NCT6793D_READ_BLOCK 2 +#define NCT6793D_BLOCK_WRITE_READ_PROC_CALL 3 +#define NCT6793D_PROC_CALL 4 +#define NCT6793D_WRITE_BYTE 8 +#define NCT6793D_WRITE_WORD 9 +#define NCT6793D_WRITE_BLOCK 10 + +/* Control register */ +#define NCT6793D_MANUAL_START 128 +#define NCT6793D_SOFT_RESET 64 + +/* Error register */ +#define NCT6793D_NO_ACK 32 + +/* Status register */ +#define NCT6793D_FIFO_EMPTY 1 +#define NCT6793D_FIFO_FULL 2 +#define NCT6793D_MANUAL_ACTIVE 4 + +#define NCT6775_LD_SMBUS 0x0B + +/* Other settings */ +#define MAX_RETRIES 400 + +enum kinds { nct6106, nct6775, nct6776, nct6779, nct6791, nct6792, nct6793, + nct6795, nct6796, nct6798 }; + +struct nct6775_sio_data { + int sioreg; + enum kinds kind; +}; + +/* used to set data->name = nct6775_device_names[data->sio_kind] */ +static const char * const nct6775_device_names[] = { + "nct6106", + "nct6775", + "nct6776", + "nct6779", + "nct6791", + "nct6792", + "nct6793", + "nct6795", + "nct6796", + "nct6798", +}; + +static const char * const nct6775_sio_names[] __initconst = { + "NCT6106D", + "NCT6775F", + "NCT6776D/F", + "NCT6779D", + "NCT6791D", + "NCT6792D", + "NCT6793D", + "NCT6795D", + "NCT6796D", + "NCT6798D", +}; + +#define SIO_REG_LDSEL 0x07 /* Logical device select */ +#define SIO_REG_DEVID 0x20 /* Device ID (2 bytes) */ +#define SIO_REG_SMBA 0x62 /* SMBus base address register */ + +#define SIO_NCT6106_ID 0xc450 +#define SIO_NCT6775_ID 0xb470 +#define SIO_NCT6776_ID 0xc330 +#define SIO_NCT6779_ID 0xc560 +#define SIO_NCT6791_ID 0xc800 +#define SIO_NCT6792_ID 0xc910 +#define SIO_NCT6793_ID 0xd120 +#define SIO_NCT6795_ID 0xd350 +#define SIO_NCT6796_ID 0xd420 +#define SIO_NCT6798_ID 0xd428 +#define SIO_ID_MASK 0xFFF0 + +static inline void +superio_outb(int ioreg, int reg, int val) +{ + outb(reg, ioreg); + outb(val, ioreg + 1); +} + +static inline int +superio_inb(int ioreg, int reg) +{ + outb(reg, ioreg); + return inb(ioreg + 1); +} + +static inline void +superio_select(int ioreg, int ld) +{ + outb(SIO_REG_LDSEL, ioreg); + outb(ld, ioreg + 1); +} + +static inline int +superio_enter(int ioreg) +{ + /* + * Try to reserve and for exclusive access. + */ + if (!request_muxed_region(ioreg, 2, DRVNAME)) + return -EBUSY; + + outb(0x87, ioreg); + outb(0x87, ioreg); + + return 0; +} + +static inline void +superio_exit(int ioreg) +{ + outb(0xaa, ioreg); + outb(0x02, ioreg); + outb(0x02, ioreg + 1); + release_region(ioreg, 2); +} + +/* + * ISA constants + */ + +#define IOREGION_ALIGNMENT (~7) +#define IOREGION_LENGTH 2 +#define ADDR_REG_OFFSET 0 +#define DATA_REG_OFFSET 1 + +#define NCT6775_REG_BANK 0x4E +#define NCT6775_REG_CONFIG 0x40 + +static struct i2c_adapter *nct6775_adapter; + +struct i2c_nct6775_adapdata { + unsigned short smba; +}; + +/* Return negative errno on error. */ +static s32 nct6775_access(struct i2c_adapter * adap, u16 addr, + unsigned short flags, char read_write, + u8 command, int size, union i2c_smbus_data * data) +{ + struct i2c_nct6775_adapdata *adapdata = i2c_get_adapdata(adap); + unsigned short nuvoton_nct6793d_smba = adapdata->smba; + int i, len, cnt; + union i2c_smbus_data tmp_data; + int timeout = 0; + + tmp_data.word = 0; + cnt = 0; + len = 0; + + outb_p(NCT6793D_SOFT_RESET, SMBHSTCTL); + + switch (size) { + case I2C_SMBUS_QUICK: + outb_p((addr << 1) | read_write, + SMBHSTADD); + break; + case I2C_SMBUS_BYTE_DATA: + tmp_data.byte = data->byte; + fallthrough; + case I2C_SMBUS_BYTE: + outb_p((addr << 1) | read_write, + SMBHSTADD); + outb_p(command, SMBHSTIDX); + if (read_write == I2C_SMBUS_WRITE) { + outb_p(tmp_data.byte, SMBHSTDAT); + outb_p(NCT6793D_WRITE_BYTE, SMBHSTCMD); + } + else { + outb_p(NCT6793D_READ_BYTE, SMBHSTCMD); + } + break; + case I2C_SMBUS_WORD_DATA: + outb_p((addr << 1) | read_write, + SMBHSTADD); + outb_p(command, SMBHSTIDX); + if (read_write == I2C_SMBUS_WRITE) { + outb_p(data->word & 0xff, SMBHSTDAT); + outb_p((data->word & 0xff00) >> 8, SMBHSTDAT); + outb_p(NCT6793D_WRITE_WORD, SMBHSTCMD); + } + else { + outb_p(NCT6793D_READ_WORD, SMBHSTCMD); + } + break; + case I2C_SMBUS_BLOCK_DATA: + outb_p((addr << 1) | read_write, + SMBHSTADD); + outb_p(command, SMBHSTIDX); + if (read_write == I2C_SMBUS_WRITE) { + len = data->block[0]; + if (len == 0 || len > I2C_SMBUS_BLOCK_MAX) + return -EINVAL; + outb_p(len, SMBBLKSZ); + + cnt = 1; + if (len >= 4) { + for (i = cnt; i <= 4; i++) { + outb_p(data->block[i], SMBHSTDAT); + } + + len -= 4; + cnt += 4; + } + else { + for (i = cnt; i <= len; i++ ) { + outb_p(data->block[i], SMBHSTDAT); + } + + len = 0; + } + + outb_p(NCT6793D_WRITE_BLOCK, SMBHSTCMD); + } + else { + return -ENOTSUPP; + } + break; + default: + dev_warn(&adap->dev, "Unsupported transaction %d\n", size); + return -EOPNOTSUPP; + } + + outb_p(NCT6793D_MANUAL_START, SMBHSTCTL); + + while ((size == I2C_SMBUS_BLOCK_DATA) && (len > 0)) { + if (read_write == I2C_SMBUS_WRITE) { + timeout = 0; + while ((inb_p(SMBHSTSTS) & NCT6793D_FIFO_EMPTY) == 0) + { + if(timeout > MAX_RETRIES) + { + return -ETIMEDOUT; + } + usleep_range(250, 500); + timeout++; + } + + //Load more bytes into FIFO + if (len >= 4) { + for (i = cnt; i <= (cnt + 4); i++) { + outb_p(data->block[i], SMBHSTDAT); + } + + len -= 4; + cnt += 4; + } + else { + for (i = cnt; i <= (cnt + len); i++) { + outb_p(data->block[i], SMBHSTDAT); + } + + len = 0; + } + } + else { + return -ENOTSUPP; + } + + } + + //wait for manual mode to complete + timeout = 0; + while ((inb_p(SMBHSTSTS) & NCT6793D_MANUAL_ACTIVE) != 0) + { + if(timeout > MAX_RETRIES) + { + return -ETIMEDOUT; + } + usleep_range(250, 500); + timeout++; + } + + if ((inb_p(SMBHSTERR) & NCT6793D_NO_ACK) != 0) { + return -ENXIO; + } + else if ((read_write == I2C_SMBUS_WRITE) || (size == I2C_SMBUS_QUICK)) { + return 0; + } + + switch (size) { + case I2C_SMBUS_QUICK: + case I2C_SMBUS_BYTE_DATA: + data->byte = inb_p(SMBHSTDAT); + break; + case I2C_SMBUS_WORD_DATA: + data->word = inb_p(SMBHSTDAT) + (inb_p(SMBHSTDAT) << 8); + break; + } + return 0; +} + +static u32 nct6775_func(struct i2c_adapter *adapter) +{ + return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE | + I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA | + I2C_FUNC_SMBUS_BLOCK_DATA; +} + +static const struct i2c_algorithm smbus_algorithm = { + .smbus_xfer = nct6775_access, + .functionality = nct6775_func, +}; + +static int nct6775_add_adapter(unsigned short smba, const char *name, struct i2c_adapter **padap) +{ + struct i2c_adapter *adap; + struct i2c_nct6775_adapdata *adapdata; + int retval; + + adap = kzalloc(sizeof(*adap), GFP_KERNEL); + if (adap == NULL) { + return -ENOMEM; + } + + adap->owner = THIS_MODULE; + adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD; + adap->algo = &smbus_algorithm; + + adapdata = kzalloc(sizeof(*adapdata), GFP_KERNEL); + if (adapdata == NULL) { + kfree(adap); + return -ENOMEM; + } + + adapdata->smba = smba; + + snprintf(adap->name, sizeof(adap->name), + "SMBus NCT67xx adapter%s at %04x", name, smba); + + i2c_set_adapdata(adap, adapdata); + + retval = i2c_add_adapter(adap); + if (retval) { + kfree(adapdata); + kfree(adap); + return retval; + } + + *padap = adap; + return 0; +} + +static void nct6775_remove_adapter(struct i2c_adapter *adap) +{ + struct i2c_nct6775_adapdata *adapdata = i2c_get_adapdata(adap); + + if (adapdata->smba) { + i2c_del_adapter(adap); + kfree(adapdata); + kfree(adap); + } +} + +//static SIMPLE_DEV_PM_OPS(nct6775_dev_pm_ops, nct6775_suspend, nct6775_resume); + +/* + * when Super-I/O functions move to a separate file, the Super-I/O + * bus will manage the lifetime of the device and this module will only keep + * track of the nct6775 driver. But since we use platform_device_alloc(), we + * must keep track of the device + */ +static struct platform_device *pdev[2]; + +static int nct6775_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct nct6775_sio_data *sio_data = dev_get_platdata(dev); + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_IO, 0); + if (!devm_request_region(&pdev->dev, res->start, IOREGION_LENGTH, + DRVNAME)) + return -EBUSY; + + switch (sio_data->kind) { + case nct6791: + case nct6792: + case nct6793: + case nct6795: + case nct6796: + case nct6798: + nct6775_add_adapter(res->start, "", &nct6775_adapter); + break; + default: + return -ENODEV; + } + + return 0; +} +/* +static void nct6791_enable_io_mapping(int sioaddr) +{ + int val; + + val = superio_inb(sioaddr, NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE); + if (val & 0x10) { + pr_info("Enabling hardware monitor logical device mappings.\n"); + superio_outb(sioaddr, NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE, + val & ~0x10); + } +}*/ + +static struct platform_driver i2c_nct6775_driver = { + .driver = { + .name = DRVNAME, +// .pm = &nct6775_dev_pm_ops, + }, + .probe = nct6775_probe, +}; + +static void __exit i2c_nct6775_exit(void) +{ + int i; + + if(nct6775_adapter) + nct6775_remove_adapter(nct6775_adapter); + + for (i = 0; i < ARRAY_SIZE(pdev); i++) { + if (pdev[i]) + platform_device_unregister(pdev[i]); + } + platform_driver_unregister(&i2c_nct6775_driver); +} + +/* nct6775_find() looks for a '627 in the Super-I/O config space */ +static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data) +{ + u16 val; + int err; + int addr; + + err = superio_enter(sioaddr); + if (err) + return err; + + val = (superio_inb(sioaddr, SIO_REG_DEVID) << 8) | + superio_inb(sioaddr, SIO_REG_DEVID + 1); + + switch (val & SIO_ID_MASK) { + case SIO_NCT6106_ID: + sio_data->kind = nct6106; + break; + case SIO_NCT6775_ID: + sio_data->kind = nct6775; + break; + case SIO_NCT6776_ID: + sio_data->kind = nct6776; + break; + case SIO_NCT6779_ID: + sio_data->kind = nct6779; + break; + case SIO_NCT6791_ID: + sio_data->kind = nct6791; + break; + case SIO_NCT6792_ID: + sio_data->kind = nct6792; + break; + case SIO_NCT6793_ID: + sio_data->kind = nct6793; + break; + case SIO_NCT6795_ID: + sio_data->kind = nct6795; + break; + case SIO_NCT6796_ID: + sio_data->kind = nct6796; + break; + case SIO_NCT6798_ID: + sio_data->kind = nct6798; + break; + default: + if (val != 0xffff) + pr_debug("unsupported chip ID: 0x%04x\n", val); + superio_exit(sioaddr); + return -ENODEV; + } + + /* We have a known chip, find the SMBus I/O address */ + superio_select(sioaddr, NCT6775_LD_SMBUS); + val = (superio_inb(sioaddr, SIO_REG_SMBA) << 8) + | superio_inb(sioaddr, SIO_REG_SMBA + 1); + addr = val & IOREGION_ALIGNMENT; + if (addr == 0) { + pr_err("Refusing to enable a Super-I/O device with a base I/O port 0\n"); + superio_exit(sioaddr); + return -ENODEV; + } + + //if (sio_data->kind == nct6791 || sio_data->kind == nct6792 || + // sio_data->kind == nct6793 || sio_data->kind == nct6795 || + // sio_data->kind == nct6796) + // nct6791_enable_io_mapping(sioaddr); + + superio_exit(sioaddr); + pr_info("Found %s or compatible chip at %#x:%#x\n", + nct6775_sio_names[sio_data->kind], sioaddr, addr); + sio_data->sioreg = sioaddr; + + return addr; +} + +static int __init i2c_nct6775_init(void) +{ + int i, err; + bool found = false; + int address; + struct resource res; + struct nct6775_sio_data sio_data; + int sioaddr[2] = { 0x2e, 0x4e }; + + err = platform_driver_register(&i2c_nct6775_driver); + if (err) + return err; + + /* + * initialize sio_data->kind and sio_data->sioreg. + * + * when Super-I/O functions move to a separate file, the Super-I/O + * driver will probe 0x2e and 0x4e and auto-detect the presence of a + * nct6775 hardware monitor, and call probe() + */ + for (i = 0; i < ARRAY_SIZE(pdev); i++) { + address = nct6775_find(sioaddr[i], &sio_data); + if (address <= 0) + continue; + + found = true; + + pdev[i] = platform_device_alloc(DRVNAME, address); + if (!pdev[i]) { + err = -ENOMEM; + goto exit_device_unregister; + } + + err = platform_device_add_data(pdev[i], &sio_data, + sizeof(struct nct6775_sio_data)); + if (err) + goto exit_device_put; + + memset(&res, 0, sizeof(res)); + res.name = DRVNAME; + res.start = address; + res.end = address + IOREGION_LENGTH - 1; + res.flags = IORESOURCE_IO; + + err = acpi_check_resource_conflict(&res); + if (err) { + platform_device_put(pdev[i]); + pdev[i] = NULL; + continue; + } + + err = platform_device_add_resources(pdev[i], &res, 1); + if (err) + goto exit_device_put; + + /* platform_device_add calls probe() */ + err = platform_device_add(pdev[i]); + if (err) + goto exit_device_put; + } + if (!found) { + err = -ENODEV; + goto exit_unregister; + } + + return 0; + +exit_device_put: + platform_device_put(pdev[i]); +exit_device_unregister: + while (--i >= 0) { + if (pdev[i]) + platform_device_unregister(pdev[i]); + } +exit_unregister: + platform_driver_unregister(&i2c_nct6775_driver); + return err; +} + +MODULE_AUTHOR("Adam Honse "); +MODULE_DESCRIPTION("SMBus driver for NCT6775F and compatible chips"); +MODULE_LICENSE("GPL"); + +module_init(i2c_nct6775_init); +module_exit(i2c_nct6775_exit); diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 6a0392172b2f..e7dd007bf6b1 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -568,11 +568,11 @@ static int piix4_transaction(struct i2c_adapter *piix4_adapter) if (srvrworks_csb5_delay) /* Extra delay for SERVERWORKS_CSB5 */ usleep_range(2000, 2100); else - usleep_range(250, 500); + usleep_range(25, 50); while ((++timeout < MAX_TIMEOUT) && ((temp = inb_p(SMBHSTSTS)) & 0x01)) - usleep_range(250, 500); + usleep_range(25, 50); /* If the SMBus is still busy, we give up */ if (timeout == MAX_TIMEOUT) { diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index 51e0c4954600..35c3ad741870 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -46,6 +46,7 @@ struct evdev_client { struct fasync_struct *fasync; struct evdev *evdev; struct list_head node; + struct rcu_head rcu; enum input_clock_type clk_type; bool revoked; unsigned long *evmasks[EV_CNT]; @@ -377,13 +378,22 @@ static void evdev_attach_client(struct evdev *evdev, spin_unlock(&evdev->client_lock); } +static void evdev_reclaim_client(struct rcu_head *rp) +{ + struct evdev_client *client = container_of(rp, struct evdev_client, rcu); + unsigned int i; + for (i = 0; i < EV_CNT; ++i) + bitmap_free(client->evmasks[i]); + kvfree(client); +} + static void evdev_detach_client(struct evdev *evdev, struct evdev_client *client) { spin_lock(&evdev->client_lock); list_del_rcu(&client->node); spin_unlock(&evdev->client_lock); - synchronize_rcu(); + call_rcu(&client->rcu, evdev_reclaim_client); } static int evdev_open_device(struct evdev *evdev) @@ -436,7 +446,6 @@ static int evdev_release(struct inode *inode, struct file *file) { struct evdev_client *client = file->private_data; struct evdev *evdev = client->evdev; - unsigned int i; mutex_lock(&evdev->mutex); @@ -448,11 +457,6 @@ static int evdev_release(struct inode *inode, struct file *file) evdev_detach_client(evdev, client); - for (i = 0; i < EV_CNT; ++i) - bitmap_free(client->evmasks[i]); - - kvfree(client); - evdev_close_device(evdev); return 0; @@ -495,7 +499,6 @@ static int evdev_open(struct inode *inode, struct file *file) err_free_client: evdev_detach_client(evdev, client); - kvfree(client); return error; } diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index d721b254e1e4..673b751f625b 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -917,6 +917,13 @@ config LEDS_ACER_A500 This option enables support for the Power Button LED of Acer Iconia Tab A500. +config LEDS_STEAMDECK + tristate "LED support for Steam Deck" + depends on LEDS_CLASS && MFD_STEAMDECK + help + This option enabled support for the status LED (next to the + power button) on Steam Deck + source "drivers/leds/blink/Kconfig" comment "Flash and Torch LED drivers" diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index ce07dc295ff0..79dc8979c2e3 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -80,6 +80,7 @@ obj-$(CONFIG_LEDS_PWM) += leds-pwm.o obj-$(CONFIG_LEDS_REGULATOR) += leds-regulator.o obj-$(CONFIG_LEDS_SC27XX_BLTC) += leds-sc27xx-bltc.o obj-$(CONFIG_LEDS_SUN50I_A100) += leds-sun50i-a100.o +obj-$(CONFIG_LEDS_STEAMDECK) += leds-steamdeck.o obj-$(CONFIG_LEDS_SUNFIRE) += leds-sunfire.o obj-$(CONFIG_LEDS_SYSCON) += leds-syscon.o obj-$(CONFIG_LEDS_TCA6507) += leds-tca6507.o diff --git a/drivers/leds/leds-steamdeck.c b/drivers/leds/leds-steamdeck.c new file mode 100644 index 000000000000..686500b8de73 --- /dev/null +++ b/drivers/leds/leds-steamdeck.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Steam Deck EC MFD LED cell driver + * + * Copyright (C) 2021-2022 Valve Corporation + * + */ + +#include +#include +#include + +struct steamdeck_led { + struct acpi_device *adev; + struct led_classdev cdev; +}; + +static int steamdeck_leds_brightness_set(struct led_classdev *cdev, + enum led_brightness value) +{ + struct steamdeck_led *sd = container_of(cdev, struct steamdeck_led, + cdev); + + if (ACPI_FAILURE(acpi_execute_simple_method(sd->adev->handle, + "CHBV", value))) + return -EIO; + + return 0; +} + +static int steamdeck_leds_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct steamdeck_led *sd; + int ret; + + sd = devm_kzalloc(dev, sizeof(*sd), GFP_KERNEL); + if (!sd) + return -ENOMEM; + + sd->adev = ACPI_COMPANION(dev->parent); + + sd->cdev.name = "status:white"; + sd->cdev.brightness_set_blocking = steamdeck_leds_brightness_set; + sd->cdev.max_brightness = 100; + + ret = devm_led_classdev_register(dev, &sd->cdev); + if (ret) { + dev_err(dev, "Failed to register LEDs device: %d\n", ret); + return ret; + } + + return 0; +} + +static const struct platform_device_id steamdeck_leds_id_table[] = { + { .name = "steamdeck-leds" }, + {} +}; +MODULE_DEVICE_TABLE(platform, steamdeck_leds_id_table); + +static struct platform_driver steamdeck_leds_driver = { + .probe = steamdeck_leds_probe, + .driver = { + .name = "steamdeck-leds", + }, + .id_table = steamdeck_leds_id_table, +}; +module_platform_driver(steamdeck_leds_driver); + +MODULE_AUTHOR("Andrey Smirnov "); +MODULE_DESCRIPTION("Steam Deck LEDs driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 59445763e55a..568f85414c85 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -3271,6 +3271,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } +#ifdef CONFIG_CACHY + set_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags); + set_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags); +#endif + ret = crypt_ctr_cipher(ti, argv[0], argv[1]); if (ret < 0) goto bad; diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig index 331b8e535e5b..80dabeebf580 100644 --- a/drivers/media/v4l2-core/Kconfig +++ b/drivers/media/v4l2-core/Kconfig @@ -40,6 +40,11 @@ config VIDEO_TUNER config V4L2_JPEG_HELPER tristate +config V4L2_LOOPBACK + tristate "V4L2 loopback device" + help + V4L2 loopback device + # Used by drivers that need v4l2-h264.ko config V4L2_H264 tristate diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile index 2177b9d63a8f..c179507cedc4 100644 --- a/drivers/media/v4l2-core/Makefile +++ b/drivers/media/v4l2-core/Makefile @@ -33,5 +33,7 @@ obj-$(CONFIG_V4L2_JPEG_HELPER) += v4l2-jpeg.o obj-$(CONFIG_V4L2_MEM2MEM_DEV) += v4l2-mem2mem.o obj-$(CONFIG_V4L2_VP9) += v4l2-vp9.o +obj-$(CONFIG_V4L2_LOOPBACK) += v4l2loopback.o + obj-$(CONFIG_VIDEO_TUNER) += tuner.o obj-$(CONFIG_VIDEO_DEV) += v4l2-dv-timings.o videodev.o diff --git a/drivers/media/v4l2-core/v4l2loopback.c b/drivers/media/v4l2-core/v4l2loopback.c new file mode 100644 index 000000000000..25cb1beb26e5 --- /dev/null +++ b/drivers/media/v4l2-core/v4l2loopback.c @@ -0,0 +1,3184 @@ +/* -*- c-file-style: "linux" -*- */ +/* + * v4l2loopback.c -- video4linux2 loopback driver + * + * Copyright (C) 2005-2009 Vasily Levin (vasaka@gmail.com) + * Copyright (C) 2010-2023 IOhannes m zmoelnig (zmoelnig@iem.at) + * Copyright (C) 2011 Stefan Diewald (stefan.diewald@mytum.de) + * Copyright (C) 2012 Anton Novikov (random.plant@gmail.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "v4l2loopback.h" + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) +#error This module is not supported on kernels before 4.0.0. +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) +#define strscpy strlcpy +#endif + +#if defined(timer_setup) && defined(from_timer) +#define HAVE_TIMER_SETUP +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 7, 0) +#define VFL_TYPE_VIDEO VFL_TYPE_GRABBER +#endif + +#define V4L2LOOPBACK_VERSION_CODE \ + KERNEL_VERSION(V4L2LOOPBACK_VERSION_MAJOR, V4L2LOOPBACK_VERSION_MINOR, \ + V4L2LOOPBACK_VERSION_BUGFIX) + +MODULE_DESCRIPTION("V4L2 loopback video device"); +MODULE_AUTHOR("Vasily Levin, " + "IOhannes m zmoelnig ," + "Stefan Diewald," + "Anton Novikov" + "et al."); +#ifdef SNAPSHOT_VERSION +MODULE_VERSION(__stringify(SNAPSHOT_VERSION)); +#else +MODULE_VERSION("" __stringify(V4L2LOOPBACK_VERSION_MAJOR) "." __stringify( + V4L2LOOPBACK_VERSION_MINOR) "." __stringify(V4L2LOOPBACK_VERSION_BUGFIX)); +#endif +MODULE_LICENSE("GPL"); + +/* + * helpers + */ +#define dprintk(fmt, args...) \ + do { \ + if (debug > 0) { \ + printk(KERN_INFO "v4l2-loopback[" __stringify( \ + __LINE__) "], pid(%d): " fmt, \ + task_pid_nr(current), ##args); \ + } \ + } while (0) + +#define MARK() \ + do { \ + if (debug > 1) { \ + printk(KERN_INFO "%s:%d[%s], pid(%d)\n", __FILE__, \ + __LINE__, __func__, task_pid_nr(current)); \ + } \ + } while (0) + +#define dprintkrw(fmt, args...) \ + do { \ + if (debug > 2) { \ + printk(KERN_INFO "v4l2-loopback[" __stringify( \ + __LINE__) "], pid(%d): " fmt, \ + task_pid_nr(current), ##args); \ + } \ + } while (0) + +static inline void v4l2l_get_timestamp(struct v4l2_buffer *b) +{ + struct timespec64 ts; + ktime_get_ts64(&ts); + + b->timestamp.tv_sec = ts.tv_sec; + b->timestamp.tv_usec = (ts.tv_nsec / NSEC_PER_USEC); + b->flags |= V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; +} + +#if BITS_PER_LONG == 32 +#include /* do_div() for 64bit division */ +static inline int v4l2l_mod64(const s64 A, const u32 B) +{ + u64 a = (u64)A; + u32 b = B; + + if (A > 0) + return do_div(a, b); + a = -A; + return -do_div(a, b); +} +#else +static inline int v4l2l_mod64(const s64 A, const u32 B) +{ + return A % B; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0) +typedef unsigned __poll_t; +#endif + +/* module constants + * can be overridden during he build process using something like + * make KCPPFLAGS="-DMAX_DEVICES=100" + */ + +/* maximum number of v4l2loopback devices that can be created */ +#ifndef MAX_DEVICES +#define MAX_DEVICES 8 +#endif + +/* whether the default is to announce capabilities exclusively or not */ +#ifndef V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS +#define V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS 0 +#endif + +/* when a producer is considered to have gone stale */ +#ifndef MAX_TIMEOUT +#define MAX_TIMEOUT (100 * 1000) /* in msecs */ +#endif + +/* max buffers that can be mapped, actually they + * are all mapped to max_buffers buffers */ +#ifndef MAX_BUFFERS +#define MAX_BUFFERS 32 +#endif + +/* module parameters */ +static int debug = 0; +module_param(debug, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(debug, "debugging level (higher values == more verbose)"); + +#define V4L2LOOPBACK_DEFAULT_MAX_BUFFERS 2 +static int max_buffers = V4L2LOOPBACK_DEFAULT_MAX_BUFFERS; +module_param(max_buffers, int, S_IRUGO); +MODULE_PARM_DESC(max_buffers, + "how many buffers should be allocated [DEFAULT: " __stringify( + V4L2LOOPBACK_DEFAULT_MAX_BUFFERS) "]"); + +/* how many times a device can be opened + * the per-module default value can be overridden on a per-device basis using + * the /sys/devices interface + * + * note that max_openers should be at least 2 in order to get a working system: + * one opener for the producer and one opener for the consumer + * however, we leave that to the user + */ +#define V4L2LOOPBACK_DEFAULT_MAX_OPENERS 10 +static int max_openers = V4L2LOOPBACK_DEFAULT_MAX_OPENERS; +module_param(max_openers, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC( + max_openers, + "how many users can open the loopback device [DEFAULT: " __stringify( + V4L2LOOPBACK_DEFAULT_MAX_OPENERS) "]"); + +static int devices = -1; +module_param(devices, int, 0); +MODULE_PARM_DESC(devices, "how many devices should be created"); + +static int video_nr[MAX_DEVICES] = { [0 ...(MAX_DEVICES - 1)] = -1 }; +module_param_array(video_nr, int, NULL, 0444); +MODULE_PARM_DESC(video_nr, + "video device numbers (-1=auto, 0=/dev/video0, etc.)"); + +static char *card_label[MAX_DEVICES]; +module_param_array(card_label, charp, NULL, 0000); +MODULE_PARM_DESC(card_label, "card labels for each device"); + +static bool exclusive_caps[MAX_DEVICES] = { + [0 ...(MAX_DEVICES - 1)] = V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS +}; +module_param_array(exclusive_caps, bool, NULL, 0444); +/* FIXXME: wording */ +MODULE_PARM_DESC( + exclusive_caps, + "whether to announce OUTPUT/CAPTURE capabilities exclusively or not [DEFAULT: " __stringify( + V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS) "]"); + +/* format specifications */ +#define V4L2LOOPBACK_SIZE_MIN_WIDTH 2 +#define V4L2LOOPBACK_SIZE_MIN_HEIGHT 1 +#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH 8192 +#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT 8192 + +#define V4L2LOOPBACK_SIZE_DEFAULT_WIDTH 640 +#define V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT 480 + +static int max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH; +module_param(max_width, int, S_IRUGO); +MODULE_PARM_DESC(max_width, + "maximum allowed frame width [DEFAULT: " __stringify( + V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH) "]"); +static int max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT; +module_param(max_height, int, S_IRUGO); +MODULE_PARM_DESC(max_height, + "maximum allowed frame height [DEFAULT: " __stringify( + V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT) "]"); + +static DEFINE_IDR(v4l2loopback_index_idr); +static DEFINE_MUTEX(v4l2loopback_ctl_mutex); + +/* frame intervals */ +#define V4L2LOOPBACK_FPS_MIN 0 +#define V4L2LOOPBACK_FPS_MAX 1000 + +/* control IDs */ +#define V4L2LOOPBACK_CID_BASE (V4L2_CID_USER_BASE | 0xf000) +#define CID_KEEP_FORMAT (V4L2LOOPBACK_CID_BASE + 0) +#define CID_SUSTAIN_FRAMERATE (V4L2LOOPBACK_CID_BASE + 1) +#define CID_TIMEOUT (V4L2LOOPBACK_CID_BASE + 2) +#define CID_TIMEOUT_IMAGE_IO (V4L2LOOPBACK_CID_BASE + 3) + +static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl); +static const struct v4l2_ctrl_ops v4l2loopback_ctrl_ops = { + .s_ctrl = v4l2loopback_s_ctrl, +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_keepformat = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_KEEP_FORMAT, + .name = "keep_format", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = 0, + .max = 1, + .step = 1, + .def = 0, + // clang-format on +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_sustainframerate = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_SUSTAIN_FRAMERATE, + .name = "sustain_framerate", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = 0, + .max = 1, + .step = 1, + .def = 0, + // clang-format on +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeout = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_TIMEOUT, + .name = "timeout", + .type = V4L2_CTRL_TYPE_INTEGER, + .min = 0, + .max = MAX_TIMEOUT, + .step = 1, + .def = 0, + // clang-format on +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeoutimageio = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_TIMEOUT_IMAGE_IO, + .name = "timeout_image_io", + .type = V4L2_CTRL_TYPE_BUTTON, + .min = 0, + .max = 1, + .step = 1, + .def = 0, + // clang-format on +}; + +/* module structures */ +struct v4l2loopback_private { + int device_nr; +}; + +/* TODO(vasaka) use typenames which are common to kernel, but first find out if + * it is needed */ +/* struct keeping state and settings of loopback device */ + +struct v4l2l_buffer { + struct v4l2_buffer buffer; + struct list_head list_head; + int use_count; +}; + +struct v4l2_loopback_device { + struct v4l2_device v4l2_dev; + struct v4l2_ctrl_handler ctrl_handler; + struct video_device *vdev; + /* pixel and stream format */ + struct v4l2_pix_format pix_format; + bool pix_format_has_valid_sizeimage; + struct v4l2_captureparm capture_param; + unsigned long frame_jiffies; + + /* ctrls */ + int keep_format; /* CID_KEEP_FORMAT; stay ready_for_capture even when all + openers close() the device */ + int sustain_framerate; /* CID_SUSTAIN_FRAMERATE; duplicate frames to maintain + (close to) nominal framerate */ + + /* buffers stuff */ + u8 *image; /* pointer to actual buffers data */ + unsigned long int imagesize; /* size of buffers data */ + int buffers_number; /* should not be big, 4 is a good choice */ + struct v4l2l_buffer buffers[MAX_BUFFERS]; /* inner driver buffers */ + int used_buffers; /* number of the actually used buffers */ + int max_openers; /* how many times can this device be opened */ + + s64 write_position; /* number of last written frame + 1 */ + struct list_head outbufs_list; /* buffers in output DQBUF order */ + int bufpos2index + [MAX_BUFFERS]; /* mapping of (read/write_position % used_buffers) + * to inner buffer index */ + long buffer_size; + + /* sustain_framerate stuff */ + struct timer_list sustain_timer; + unsigned int reread_count; + + /* timeout stuff */ + unsigned long timeout_jiffies; /* CID_TIMEOUT; 0 means disabled */ + int timeout_image_io; /* CID_TIMEOUT_IMAGE_IO; next opener will + * read/write to timeout_image */ + u8 *timeout_image; /* copy of it will be captured when timeout passes */ + struct v4l2l_buffer timeout_image_buffer; + struct timer_list timeout_timer; + int timeout_happened; + + /* sync stuff */ + atomic_t open_count; + + int ready_for_capture; /* set to the number of writers that opened the + * device and negotiated format. */ + int ready_for_output; /* set to true when no writer is currently attached + * this differs slightly from !ready_for_capture, + * e.g. when using fallback images */ + int active_readers; /* increase if any reader starts streaming */ + int announce_all_caps; /* set to false, if device caps (OUTPUT/CAPTURE) + * should only be announced if the resp. "ready" + * flag is set; default=TRUE */ + + int min_width, max_width; + int min_height, max_height; + + char card_label[32]; + + wait_queue_head_t read_event; + spinlock_t lock, list_lock; +}; + +/* types of opener shows what opener wants to do with loopback */ +enum opener_type { + // clang-format off + UNNEGOTIATED = 0, + READER = 1, + WRITER = 2, + // clang-format on +}; + +/* struct keeping state and type of opener */ +struct v4l2_loopback_opener { + enum opener_type type; + s64 read_position; /* number of last processed frame + 1 or + * write_position - 1 if reader went out of sync */ + unsigned int reread_count; + struct v4l2_buffer *buffers; + int buffers_number; /* should not be big, 4 is a good choice */ + int timeout_image_io; + + struct v4l2_fh fh; +}; + +#define fh_to_opener(ptr) container_of((ptr), struct v4l2_loopback_opener, fh) + +/* this is heavily inspired by the bttv driver found in the linux kernel */ +struct v4l2l_format { + char *name; + int fourcc; /* video4linux 2 */ + int depth; /* bit/pixel */ + int flags; +}; +/* set the v4l2l_format.flags to PLANAR for non-packed formats */ +#define FORMAT_FLAGS_PLANAR 0x01 +#define FORMAT_FLAGS_COMPRESSED 0x02 + +#include "v4l2loopback_formats.h" + +#ifndef V4L2_TYPE_IS_CAPTURE +#define V4L2_TYPE_IS_CAPTURE(type) \ + ((type) == V4L2_BUF_TYPE_VIDEO_CAPTURE || \ + (type) == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) +#endif /* V4L2_TYPE_IS_CAPTURE */ +#ifndef V4L2_TYPE_IS_OUTPUT +#define V4L2_TYPE_IS_OUTPUT(type) \ + ((type) == V4L2_BUF_TYPE_VIDEO_OUTPUT || \ + (type) == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) +#endif /* V4L2_TYPE_IS_OUTPUT */ + +/* whether the format can be changed */ +/* the format is fixated if we + - have writers (ready_for_capture>0) + - and/or have readers (active_readers>0) +*/ +#define V4L2LOOPBACK_IS_FIXED_FMT(device) \ + (device->ready_for_capture > 0 || device->active_readers > 0 || \ + device->keep_format) + +static const unsigned int FORMATS = ARRAY_SIZE(formats); + +static char *fourcc2str(unsigned int fourcc, char buf[4]) +{ + buf[0] = (fourcc >> 0) & 0xFF; + buf[1] = (fourcc >> 8) & 0xFF; + buf[2] = (fourcc >> 16) & 0xFF; + buf[3] = (fourcc >> 24) & 0xFF; + + return buf; +} + +static const struct v4l2l_format *format_by_fourcc(int fourcc) +{ + unsigned int i; + + for (i = 0; i < FORMATS; i++) { + if (formats[i].fourcc == fourcc) + return formats + i; + } + + dprintk("unsupported format '%c%c%c%c'\n", (fourcc >> 0) & 0xFF, + (fourcc >> 8) & 0xFF, (fourcc >> 16) & 0xFF, + (fourcc >> 24) & 0xFF); + return NULL; +} + +static void pix_format_set_size(struct v4l2_pix_format *f, + const struct v4l2l_format *fmt, + unsigned int width, unsigned int height) +{ + f->width = width; + f->height = height; + + if (fmt->flags & FORMAT_FLAGS_PLANAR) { + f->bytesperline = width; /* Y plane */ + f->sizeimage = (width * height * fmt->depth) >> 3; + } else if (fmt->flags & FORMAT_FLAGS_COMPRESSED) { + /* doesn't make sense for compressed formats */ + f->bytesperline = 0; + f->sizeimage = (width * height * fmt->depth) >> 3; + } else { + f->bytesperline = (width * fmt->depth) >> 3; + f->sizeimage = height * f->bytesperline; + } +} + +static int v4l2l_fill_format(struct v4l2_format *fmt, int capture, + const u32 minwidth, const u32 maxwidth, + const u32 minheight, const u32 maxheight) +{ + u32 width = fmt->fmt.pix.width, height = fmt->fmt.pix.height; + u32 pixelformat = fmt->fmt.pix.pixelformat; + struct v4l2_format fmt0 = *fmt; + u32 bytesperline = 0, sizeimage = 0; + if (!width) + width = V4L2LOOPBACK_SIZE_DEFAULT_WIDTH; + if (!height) + height = V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT; + if (width < minwidth) + width = minwidth; + if (width > maxwidth) + width = maxwidth; + if (height < minheight) + height = minheight; + if (height > maxheight) + height = maxheight; + + /* sets: width,height,pixelformat,bytesperline,sizeimage */ + if (!(V4L2_TYPE_IS_MULTIPLANAR(fmt0.type))) { + fmt0.fmt.pix.bytesperline = 0; + fmt0.fmt.pix.sizeimage = 0; + } + + if (0) { + ; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0) + } else if (!v4l2_fill_pixfmt(&fmt0.fmt.pix, pixelformat, width, + height)) { + ; + } else if (!v4l2_fill_pixfmt_mp(&fmt0.fmt.pix_mp, pixelformat, width, + height)) { + ; +#endif + } else { + const struct v4l2l_format *format = + format_by_fourcc(pixelformat); + if (!format) + return -EINVAL; + pix_format_set_size(&fmt0.fmt.pix, format, width, height); + fmt0.fmt.pix.pixelformat = format->fourcc; + } + + if (V4L2_TYPE_IS_MULTIPLANAR(fmt0.type)) { + *fmt = fmt0; + + if ((fmt->fmt.pix_mp.colorspace == V4L2_COLORSPACE_DEFAULT) || + (fmt->fmt.pix_mp.colorspace > V4L2_COLORSPACE_DCI_P3)) + fmt->fmt.pix_mp.colorspace = V4L2_COLORSPACE_SRGB; + if (V4L2_FIELD_ANY == fmt->fmt.pix_mp.field) + fmt->fmt.pix_mp.field = V4L2_FIELD_NONE; + if (capture) + fmt->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; + else + fmt->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; + } else { + bytesperline = fmt->fmt.pix.bytesperline; + sizeimage = fmt->fmt.pix.sizeimage; + + *fmt = fmt0; + + if (!fmt->fmt.pix.bytesperline) + fmt->fmt.pix.bytesperline = bytesperline; + if (!fmt->fmt.pix.sizeimage) + fmt->fmt.pix.sizeimage = sizeimage; + + if ((fmt->fmt.pix.colorspace == V4L2_COLORSPACE_DEFAULT) || + (fmt->fmt.pix.colorspace > V4L2_COLORSPACE_DCI_P3)) + fmt->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB; + if (V4L2_FIELD_ANY == fmt->fmt.pix.field) + fmt->fmt.pix.field = V4L2_FIELD_NONE; + if (capture) + fmt->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + else + fmt->type = V4L2_BUF_TYPE_VIDEO_OUTPUT; + } + + return 0; +} + +/* Checks if v4l2l_fill_format() has set a valid, fixed sizeimage val. */ +static bool v4l2l_pix_format_has_valid_sizeimage(struct v4l2_format *fmt) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0) + const struct v4l2_format_info *info; + + info = v4l2_format_info(fmt->fmt.pix.pixelformat); + if (info && info->mem_planes == 1) + return true; +#endif + + return false; +} + +static int pix_format_eq(const struct v4l2_pix_format *ref, + const struct v4l2_pix_format *tgt, int strict) +{ + /* check if the two formats are equivalent. + * ANY fields are handled gracefully + */ +#define _pix_format_eq0(x) \ + if (ref->x != tgt->x) \ + result = 0 +#define _pix_format_eq1(x, def) \ + do { \ + if ((def != tgt->x) && (ref->x != tgt->x)) { \ + printk(KERN_INFO #x " failed"); \ + result = 0; \ + } \ + } while (0) + int result = 1; + _pix_format_eq0(width); + _pix_format_eq0(height); + _pix_format_eq0(pixelformat); + if (!strict) + return result; + _pix_format_eq1(field, V4L2_FIELD_ANY); + _pix_format_eq0(bytesperline); + _pix_format_eq0(sizeimage); + _pix_format_eq1(colorspace, V4L2_COLORSPACE_DEFAULT); + return result; +} + +static struct v4l2_loopback_device *v4l2loopback_getdevice(struct file *f); +static int inner_try_setfmt(struct file *file, struct v4l2_format *fmt) +{ + int capture = V4L2_TYPE_IS_CAPTURE(fmt->type); + struct v4l2_loopback_device *dev; + int needschange = 0; + char buf[5]; + buf[4] = 0; + + dev = v4l2loopback_getdevice(file); + + needschange = !(pix_format_eq(&dev->pix_format, &fmt->fmt.pix, 0)); + if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) { + fmt->fmt.pix = dev->pix_format; + if (needschange) { + if (dev->active_readers > 0 && capture) { + /* cannot call fmt_cap while there are readers */ + return -EBUSY; + } + if (dev->ready_for_capture > 0 && !capture) { + /* cannot call fmt_out while there are writers */ + return -EBUSY; + } + } + } + if (v4l2l_fill_format(fmt, capture, dev->min_width, dev->max_width, + dev->min_height, dev->max_height) != 0) { + return -EINVAL; + } + + if (1) { + char buf[5]; + buf[4] = 0; + dprintk("capFOURCC=%s\n", + fourcc2str(dev->pix_format.pixelformat, buf)); + } + return 0; +} + +static int set_timeperframe(struct v4l2_loopback_device *dev, + struct v4l2_fract *tpf) +{ + if ((tpf->denominator < 1) || (tpf->numerator < 1)) { + return -EINVAL; + } + dev->capture_param.timeperframe = *tpf; + dev->frame_jiffies = max(1UL, msecs_to_jiffies(1000) * tpf->numerator / + tpf->denominator); + return 0; +} + +static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd); + +/* device attributes */ +/* available via sysfs: /sys/devices/virtual/video4linux/video* */ + +static ssize_t attr_show_format(struct device *cd, + struct device_attribute *attr, char *buf) +{ + /* gets the current format as "FOURCC:WxH@f/s", e.g. "YUYV:320x240@1000/30" */ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + const struct v4l2_fract *tpf; + char buf4cc[5], buf_fps[32]; + + if (!dev || !V4L2LOOPBACK_IS_FIXED_FMT(dev)) + return 0; + tpf = &dev->capture_param.timeperframe; + + fourcc2str(dev->pix_format.pixelformat, buf4cc); + buf4cc[4] = 0; + if (tpf->numerator == 1) + snprintf(buf_fps, sizeof(buf_fps), "%d", tpf->denominator); + else + snprintf(buf_fps, sizeof(buf_fps), "%d/%d", tpf->denominator, + tpf->numerator); + return sprintf(buf, "%4s:%dx%d@%s\n", buf4cc, dev->pix_format.width, + dev->pix_format.height, buf_fps); +} + +static ssize_t attr_store_format(struct device *cd, + struct device_attribute *attr, const char *buf, + size_t len) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + int fps_num = 0, fps_den = 1; + + if (!dev) + return -ENODEV; + + /* only fps changing is supported */ + if (sscanf(buf, "@%d/%d", &fps_num, &fps_den) > 0) { + struct v4l2_fract f = { .numerator = fps_den, + .denominator = fps_num }; + int err = 0; + if ((err = set_timeperframe(dev, &f)) < 0) + return err; + return len; + } + return -EINVAL; +} + +static DEVICE_ATTR(format, S_IRUGO | S_IWUSR, attr_show_format, + attr_store_format); + +static ssize_t attr_show_buffers(struct device *cd, + struct device_attribute *attr, char *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + + if (!dev) + return -ENODEV; + + return sprintf(buf, "%d\n", dev->used_buffers); +} + +static DEVICE_ATTR(buffers, S_IRUGO, attr_show_buffers, NULL); + +static ssize_t attr_show_maxopeners(struct device *cd, + struct device_attribute *attr, char *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + + if (!dev) + return -ENODEV; + + return sprintf(buf, "%d\n", dev->max_openers); +} + +static ssize_t attr_store_maxopeners(struct device *cd, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct v4l2_loopback_device *dev = NULL; + unsigned long curr = 0; + + if (kstrtoul(buf, 0, &curr)) + return -EINVAL; + + dev = v4l2loopback_cd2dev(cd); + if (!dev) + return -ENODEV; + + if (dev->max_openers == curr) + return len; + + if (curr > __INT_MAX__ || dev->open_count.counter > curr) { + /* request to limit to less openers as are currently attached to us */ + return -EINVAL; + } + + dev->max_openers = (int)curr; + + return len; +} + +static DEVICE_ATTR(max_openers, S_IRUGO | S_IWUSR, attr_show_maxopeners, + attr_store_maxopeners); + +static ssize_t attr_show_state(struct device *cd, struct device_attribute *attr, + char *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + + if (!dev) + return -ENODEV; + + if (dev->ready_for_capture) + return sprintf(buf, "capture\n"); + if (dev->ready_for_output) + return sprintf(buf, "output\n"); + + return -EAGAIN; +} + +static DEVICE_ATTR(state, S_IRUGO, attr_show_state, NULL); + +static void v4l2loopback_remove_sysfs(struct video_device *vdev) +{ +#define V4L2_SYSFS_DESTROY(x) device_remove_file(&vdev->dev, &dev_attr_##x) + + if (vdev) { + V4L2_SYSFS_DESTROY(format); + V4L2_SYSFS_DESTROY(buffers); + V4L2_SYSFS_DESTROY(max_openers); + V4L2_SYSFS_DESTROY(state); + /* ... */ + } +} + +static void v4l2loopback_create_sysfs(struct video_device *vdev) +{ + int res = 0; + +#define V4L2_SYSFS_CREATE(x) \ + res = device_create_file(&vdev->dev, &dev_attr_##x); \ + if (res < 0) \ + break + if (!vdev) + return; + do { + V4L2_SYSFS_CREATE(format); + V4L2_SYSFS_CREATE(buffers); + V4L2_SYSFS_CREATE(max_openers); + V4L2_SYSFS_CREATE(state); + /* ... */ + } while (0); + + if (res >= 0) + return; + dev_err(&vdev->dev, "%s error: %d\n", __func__, res); +} + +/* Event APIs */ + +#define V4L2LOOPBACK_EVENT_BASE (V4L2_EVENT_PRIVATE_START) +#define V4L2LOOPBACK_EVENT_OFFSET 0x08E00000 +#define V4L2_EVENT_PRI_CLIENT_USAGE \ + (V4L2LOOPBACK_EVENT_BASE + V4L2LOOPBACK_EVENT_OFFSET + 1) + +struct v4l2_event_client_usage { + __u32 count; +}; + +/* global module data */ +/* find a device based on it's device-number (e.g. '3' for /dev/video3) */ +struct v4l2loopback_lookup_cb_data { + int device_nr; + struct v4l2_loopback_device *device; +}; +static int v4l2loopback_lookup_cb(int id, void *ptr, void *data) +{ + struct v4l2_loopback_device *device = ptr; + struct v4l2loopback_lookup_cb_data *cbdata = data; + if (cbdata && device && device->vdev) { + if (device->vdev->num == cbdata->device_nr) { + cbdata->device = device; + cbdata->device_nr = id; + return 1; + } + } + return 0; +} +static int v4l2loopback_lookup(int device_nr, + struct v4l2_loopback_device **device) +{ + struct v4l2loopback_lookup_cb_data data = { + .device_nr = device_nr, + .device = NULL, + }; + int err = idr_for_each(&v4l2loopback_index_idr, &v4l2loopback_lookup_cb, + &data); + if (1 == err) { + if (device) + *device = data.device; + return data.device_nr; + } + return -ENODEV; +} +static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd) +{ + struct video_device *loopdev = to_video_device(cd); + struct v4l2loopback_private *ptr = + (struct v4l2loopback_private *)video_get_drvdata(loopdev); + int nr = ptr->device_nr; + + return idr_find(&v4l2loopback_index_idr, nr); +} + +static struct v4l2_loopback_device *v4l2loopback_getdevice(struct file *f) +{ + struct v4l2loopback_private *ptr = video_drvdata(f); + int nr = ptr->device_nr; + + return idr_find(&v4l2loopback_index_idr, nr); +} + +/* forward declarations */ +static void client_usage_queue_event(struct video_device *vdev); +static void init_buffers(struct v4l2_loopback_device *dev); +static int allocate_buffers(struct v4l2_loopback_device *dev); +static void free_buffers(struct v4l2_loopback_device *dev); +static void try_free_buffers(struct v4l2_loopback_device *dev); +static int allocate_timeout_image(struct v4l2_loopback_device *dev); +static void check_timers(struct v4l2_loopback_device *dev); +static const struct v4l2_file_operations v4l2_loopback_fops; +static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops; + +/* Queue helpers */ +/* next functions sets buffer flags and adjusts counters accordingly */ +static inline void set_done(struct v4l2l_buffer *buffer) +{ + buffer->buffer.flags &= ~V4L2_BUF_FLAG_QUEUED; + buffer->buffer.flags |= V4L2_BUF_FLAG_DONE; +} + +static inline void set_queued(struct v4l2l_buffer *buffer) +{ + buffer->buffer.flags &= ~V4L2_BUF_FLAG_DONE; + buffer->buffer.flags |= V4L2_BUF_FLAG_QUEUED; +} + +static inline void unset_flags(struct v4l2l_buffer *buffer) +{ + buffer->buffer.flags &= ~V4L2_BUF_FLAG_QUEUED; + buffer->buffer.flags &= ~V4L2_BUF_FLAG_DONE; +} + +/* V4L2 ioctl caps and params calls */ +/* returns device capabilities + * called on VIDIOC_QUERYCAP + */ +static int vidioc_querycap(struct file *file, void *priv, + struct v4l2_capability *cap) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + int device_nr = + ((struct v4l2loopback_private *)video_get_drvdata(dev->vdev)) + ->device_nr; + __u32 capabilities = V4L2_CAP_STREAMING | V4L2_CAP_READWRITE; + + strscpy(cap->driver, "v4l2 loopback", sizeof(cap->driver)); + snprintf(cap->card, sizeof(cap->card), "%s", dev->card_label); + snprintf(cap->bus_info, sizeof(cap->bus_info), + "platform:v4l2loopback-%03d", device_nr); + + if (dev->announce_all_caps) { + capabilities |= V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT; + } else { + if (dev->ready_for_capture) { + capabilities |= V4L2_CAP_VIDEO_CAPTURE; + } + if (dev->ready_for_output) { + capabilities |= V4L2_CAP_VIDEO_OUTPUT; + } + } + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) + dev->vdev->device_caps = +#endif /* >=linux-4.7.0 */ + cap->device_caps = cap->capabilities = capabilities; + + cap->capabilities |= V4L2_CAP_DEVICE_CAPS; + + memset(cap->reserved, 0, sizeof(cap->reserved)); + return 0; +} + +static int vidioc_enum_framesizes(struct file *file, void *fh, + struct v4l2_frmsizeenum *argp) +{ + struct v4l2_loopback_device *dev; + + /* there can be only one... */ + if (argp->index) + return -EINVAL; + + dev = v4l2loopback_getdevice(file); + if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) { + /* format has already been negotiated + * cannot change during runtime + */ + if (argp->pixel_format != dev->pix_format.pixelformat) + return -EINVAL; + + argp->type = V4L2_FRMSIZE_TYPE_DISCRETE; + + argp->discrete.width = dev->pix_format.width; + argp->discrete.height = dev->pix_format.height; + } else { + /* if the format has not been negotiated yet, we accept anything + */ + if (NULL == format_by_fourcc(argp->pixel_format)) + return -EINVAL; + + if (dev->min_width == dev->max_width && + dev->min_height == dev->max_height) { + argp->type = V4L2_FRMSIZE_TYPE_DISCRETE; + + argp->discrete.width = dev->min_width; + argp->discrete.height = dev->min_height; + } else { + argp->type = V4L2_FRMSIZE_TYPE_CONTINUOUS; + + argp->stepwise.min_width = dev->min_width; + argp->stepwise.min_height = dev->min_height; + + argp->stepwise.max_width = dev->max_width; + argp->stepwise.max_height = dev->max_height; + + argp->stepwise.step_width = 1; + argp->stepwise.step_height = 1; + } + } + return 0; +} + +/* returns frameinterval (fps) for the set resolution + * called on VIDIOC_ENUM_FRAMEINTERVALS + */ +static int vidioc_enum_frameintervals(struct file *file, void *fh, + struct v4l2_frmivalenum *argp) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + + /* there can be only one... */ + if (argp->index) + return -EINVAL; + + if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) { + if (argp->width != dev->pix_format.width || + argp->height != dev->pix_format.height || + argp->pixel_format != dev->pix_format.pixelformat) + return -EINVAL; + + argp->type = V4L2_FRMIVAL_TYPE_DISCRETE; + argp->discrete = dev->capture_param.timeperframe; + } else { + if (argp->width < dev->min_width || + argp->width > dev->max_width || + argp->height < dev->min_height || + argp->height > dev->max_height || + NULL == format_by_fourcc(argp->pixel_format)) + return -EINVAL; + + argp->type = V4L2_FRMIVAL_TYPE_CONTINUOUS; + argp->stepwise.min.numerator = 1; + argp->stepwise.min.denominator = V4L2LOOPBACK_FPS_MAX; + argp->stepwise.max.numerator = 1; + argp->stepwise.max.denominator = V4L2LOOPBACK_FPS_MIN; + argp->stepwise.step.numerator = 1; + argp->stepwise.step.denominator = 1; + } + + return 0; +} + +/* ------------------ CAPTURE ----------------------- */ + +/* returns device formats + * called on VIDIOC_ENUM_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE + */ +static int vidioc_enum_fmt_cap(struct file *file, void *fh, + struct v4l2_fmtdesc *f) +{ + struct v4l2_loopback_device *dev; + const struct v4l2l_format *fmt; + MARK(); + + dev = v4l2loopback_getdevice(file); + + if (f->index) + return -EINVAL; + + if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) { + /* format has been fixed, so only one single format is supported */ + const __u32 format = dev->pix_format.pixelformat; + + if ((fmt = format_by_fourcc(format))) { + snprintf(f->description, sizeof(f->description), "%s", + fmt->name); + } else { + snprintf(f->description, sizeof(f->description), + "[%c%c%c%c]", (format >> 0) & 0xFF, + (format >> 8) & 0xFF, (format >> 16) & 0xFF, + (format >> 24) & 0xFF); + } + + f->pixelformat = dev->pix_format.pixelformat; + } else { + return -EINVAL; + } + f->flags = 0; + MARK(); + return 0; +} + +/* returns current video format + * called on VIDIOC_G_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE + */ +static int vidioc_g_fmt_cap(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + if (!dev->ready_for_capture && !dev->ready_for_output) + return -EINVAL; + + fmt->fmt.pix = dev->pix_format; + MARK(); + return 0; +} + +/* checks if it is OK to change to format fmt; + * actual check is done by inner_try_setfmt + * just checking that pixelformat is OK and set other parameters, app should + * obey this decision + * called on VIDIOC_TRY_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE + */ +static int vidioc_try_fmt_cap(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + int ret = 0; + if (!V4L2_TYPE_IS_CAPTURE(fmt->type)) + return -EINVAL; + ret = inner_try_setfmt(file, fmt); + if (-EBUSY == ret) + return 0; + return ret; +} + +/* sets new output format, if possible + * actually format is set by input and we even do not check it, just return + * current one, but it is possible to set subregions of input TODO(vasaka) + * called on VIDIOC_S_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE + */ +static int vidioc_s_fmt_cap(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + int ret; + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + if (!V4L2_TYPE_IS_CAPTURE(fmt->type)) + return -EINVAL; + ret = inner_try_setfmt(file, fmt); + if (!ret) { + dev->pix_format = fmt->fmt.pix; + } + return ret; +} + +/* ------------------ OUTPUT ----------------------- */ + +/* returns device formats; + * LATER: allow all formats + * called on VIDIOC_ENUM_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT + */ +static int vidioc_enum_fmt_out(struct file *file, void *fh, + struct v4l2_fmtdesc *f) +{ + struct v4l2_loopback_device *dev; + const struct v4l2l_format *fmt; + + dev = v4l2loopback_getdevice(file); + + if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) { + /* format has been fixed, so only one single format is supported */ + const __u32 format = dev->pix_format.pixelformat; + + if (f->index) + return -EINVAL; + + if ((fmt = format_by_fourcc(format))) { + snprintf(f->description, sizeof(f->description), "%s", + fmt->name); + } else { + snprintf(f->description, sizeof(f->description), + "[%c%c%c%c]", (format >> 0) & 0xFF, + (format >> 8) & 0xFF, (format >> 16) & 0xFF, + (format >> 24) & 0xFF); + } + + f->pixelformat = dev->pix_format.pixelformat; + } else { + /* fill in a dummy format */ + /* coverity[unsigned_compare] */ + if (f->index < 0 || f->index >= FORMATS) + return -EINVAL; + + fmt = &formats[f->index]; + + f->pixelformat = fmt->fourcc; + snprintf(f->description, sizeof(f->description), "%s", + fmt->name); + } + f->flags = 0; + + return 0; +} + +/* returns current video format format fmt */ +/* NOTE: this is called from the producer + * so if format has not been negotiated yet, + * it should return ALL of available formats, + * called on VIDIOC_G_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT + */ +static int vidioc_g_fmt_out(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + + /* + * LATER: this should return the currently valid format + * gstreamer doesn't like it, if this returns -EINVAL, as it + * then concludes that there is _no_ valid format + * CHECK whether this assumption is wrong, + * or whether we have to always provide a valid format + */ + + fmt->fmt.pix = dev->pix_format; + return 0; +} + +/* checks if it is OK to change to format fmt; + * if format is negotiated do not change it + * called on VIDIOC_TRY_FMT with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT + */ +static int vidioc_try_fmt_out(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + int ret = 0; + if (!V4L2_TYPE_IS_OUTPUT(fmt->type)) + return -EINVAL; + ret = inner_try_setfmt(file, fmt); + if (-EBUSY == ret) + return 0; + return ret; +} + +/* sets new output format, if possible; + * allocate data here because we do not know if it will be streaming or + * read/write IO + * called on VIDIOC_S_FMT with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT + */ +static int vidioc_s_fmt_out(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + struct v4l2_loopback_device *dev; + int ret; + char buf[5]; + buf[4] = 0; + if (!V4L2_TYPE_IS_OUTPUT(fmt->type)) + return -EINVAL; + dev = v4l2loopback_getdevice(file); + + ret = inner_try_setfmt(file, fmt); + if (!ret) { + dev->pix_format = fmt->fmt.pix; + dev->pix_format_has_valid_sizeimage = + v4l2l_pix_format_has_valid_sizeimage(fmt); + dprintk("s_fmt_out(%d) %d...%d\n", ret, dev->ready_for_capture, + dev->pix_format.sizeimage); + dprintk("outFOURCC=%s\n", + fourcc2str(dev->pix_format.pixelformat, buf)); + + if (!dev->ready_for_capture) { + dev->buffer_size = + PAGE_ALIGN(dev->pix_format.sizeimage); + // JMZ: TODO get rid of the next line + fmt->fmt.pix.sizeimage = dev->buffer_size; + ret = allocate_buffers(dev); + } + } + return ret; +} + +// #define V4L2L_OVERLAY +#ifdef V4L2L_OVERLAY +/* ------------------ OVERLAY ----------------------- */ +/* currently unsupported */ +/* GSTreamer's v4l2sink is buggy, as it requires the overlay to work + * while it should only require it, if overlay is requested + * once the gstreamer element is fixed, remove the overlay dummies + */ +#warning OVERLAY dummies +static int vidioc_g_fmt_overlay(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + return 0; +} + +static int vidioc_s_fmt_overlay(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + return 0; +} +#endif /* V4L2L_OVERLAY */ + +/* ------------------ PARAMs ----------------------- */ + +/* get some data flow parameters, only capability, fps and readbuffers has + * effect on this driver + * called on VIDIOC_G_PARM + */ +static int vidioc_g_parm(struct file *file, void *priv, + struct v4l2_streamparm *parm) +{ + /* do not care about type of opener, hope these enums would always be + * compatible */ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + parm->parm.capture = dev->capture_param; + return 0; +} + +/* get some data flow parameters, only capability, fps and readbuffers has + * effect on this driver + * called on VIDIOC_S_PARM + */ +static int vidioc_s_parm(struct file *file, void *priv, + struct v4l2_streamparm *parm) +{ + struct v4l2_loopback_device *dev; + int err = 0; + MARK(); + + dev = v4l2loopback_getdevice(file); + dprintk("vidioc_s_parm called frate=%d/%d\n", + parm->parm.capture.timeperframe.numerator, + parm->parm.capture.timeperframe.denominator); + + switch (parm->type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + if ((err = set_timeperframe( + dev, &parm->parm.capture.timeperframe)) < 0) + return err; + break; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + if ((err = set_timeperframe( + dev, &parm->parm.capture.timeperframe)) < 0) + return err; + break; + default: + return -1; + } + + parm->parm.capture = dev->capture_param; + return 0; +} + +#ifdef V4L2LOOPBACK_WITH_STD +/* sets a tv standard, actually we do not need to handle this any special way + * added to support effecttv + * called on VIDIOC_S_STD + */ +static int vidioc_s_std(struct file *file, void *fh, v4l2_std_id *_std) +{ + v4l2_std_id req_std = 0, supported_std = 0; + const v4l2_std_id all_std = V4L2_STD_ALL, no_std = 0; + + if (_std) { + req_std = *_std; + *_std = all_std; + } + + /* we support everything in V4L2_STD_ALL, but not more... */ + supported_std = (all_std & req_std); + if (no_std == supported_std) + return -EINVAL; + + return 0; +} + +/* gets a fake video standard + * called on VIDIOC_G_STD + */ +static int vidioc_g_std(struct file *file, void *fh, v4l2_std_id *norm) +{ + if (norm) + *norm = V4L2_STD_ALL; + return 0; +} +/* gets a fake video standard + * called on VIDIOC_QUERYSTD + */ +static int vidioc_querystd(struct file *file, void *fh, v4l2_std_id *norm) +{ + if (norm) + *norm = V4L2_STD_ALL; + return 0; +} +#endif /* V4L2LOOPBACK_WITH_STD */ + +static int v4l2loopback_set_ctrl(struct v4l2_loopback_device *dev, u32 id, + s64 val) +{ + switch (id) { + case CID_KEEP_FORMAT: + if (val < 0 || val > 1) + return -EINVAL; + dev->keep_format = val; + try_free_buffers( + dev); /* will only free buffers if !keep_format */ + break; + case CID_SUSTAIN_FRAMERATE: + if (val < 0 || val > 1) + return -EINVAL; + spin_lock_bh(&dev->lock); + dev->sustain_framerate = val; + check_timers(dev); + spin_unlock_bh(&dev->lock); + break; + case CID_TIMEOUT: + if (val < 0 || val > MAX_TIMEOUT) + return -EINVAL; + spin_lock_bh(&dev->lock); + dev->timeout_jiffies = msecs_to_jiffies(val); + check_timers(dev); + spin_unlock_bh(&dev->lock); + allocate_timeout_image(dev); + break; + case CID_TIMEOUT_IMAGE_IO: + dev->timeout_image_io = 1; + break; + default: + return -EINVAL; + } + return 0; +} + +static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl) +{ + struct v4l2_loopback_device *dev = container_of( + ctrl->handler, struct v4l2_loopback_device, ctrl_handler); + return v4l2loopback_set_ctrl(dev, ctrl->id, ctrl->val); +} + +/* returns set of device outputs, in our case there is only one + * called on VIDIOC_ENUMOUTPUT + */ +static int vidioc_enum_output(struct file *file, void *fh, + struct v4l2_output *outp) +{ + __u32 index = outp->index; + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + MARK(); + + if (!dev->announce_all_caps && !dev->ready_for_output) + return -ENOTTY; + + if (0 != index) + return -EINVAL; + + /* clear all data (including the reserved fields) */ + memset(outp, 0, sizeof(*outp)); + + outp->index = index; + strscpy(outp->name, "loopback in", sizeof(outp->name)); + outp->type = V4L2_OUTPUT_TYPE_ANALOG; + outp->audioset = 0; + outp->modulator = 0; +#ifdef V4L2LOOPBACK_WITH_STD + outp->std = V4L2_STD_ALL; +#ifdef V4L2_OUT_CAP_STD + outp->capabilities |= V4L2_OUT_CAP_STD; +#endif /* V4L2_OUT_CAP_STD */ +#endif /* V4L2LOOPBACK_WITH_STD */ + + return 0; +} + +/* which output is currently active, + * called on VIDIOC_G_OUTPUT + */ +static int vidioc_g_output(struct file *file, void *fh, unsigned int *i) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + if (!dev->announce_all_caps && !dev->ready_for_output) + return -ENOTTY; + if (i) + *i = 0; + return 0; +} + +/* set output, can make sense if we have more than one video src, + * called on VIDIOC_S_OUTPUT + */ +static int vidioc_s_output(struct file *file, void *fh, unsigned int i) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + if (!dev->announce_all_caps && !dev->ready_for_output) + return -ENOTTY; + + if (i) + return -EINVAL; + + return 0; +} + +/* returns set of device inputs, in our case there is only one, + * but later I may add more + * called on VIDIOC_ENUMINPUT + */ +static int vidioc_enum_input(struct file *file, void *fh, + struct v4l2_input *inp) +{ + struct v4l2_loopback_device *dev; + __u32 index = inp->index; + MARK(); + + if (0 != index) + return -EINVAL; + + /* clear all data (including the reserved fields) */ + memset(inp, 0, sizeof(*inp)); + + inp->index = index; + strscpy(inp->name, "loopback", sizeof(inp->name)); + inp->type = V4L2_INPUT_TYPE_CAMERA; + inp->audioset = 0; + inp->tuner = 0; + inp->status = 0; + +#ifdef V4L2LOOPBACK_WITH_STD + inp->std = V4L2_STD_ALL; +#ifdef V4L2_IN_CAP_STD + inp->capabilities |= V4L2_IN_CAP_STD; +#endif +#endif /* V4L2LOOPBACK_WITH_STD */ + + dev = v4l2loopback_getdevice(file); + if (!dev->ready_for_capture) { + inp->status |= V4L2_IN_ST_NO_SIGNAL; + } + + return 0; +} + +/* which input is currently active, + * called on VIDIOC_G_INPUT + */ +static int vidioc_g_input(struct file *file, void *fh, unsigned int *i) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + if (!dev->announce_all_caps && !dev->ready_for_capture) + return -ENOTTY; + if (i) + *i = 0; + return 0; +} + +/* set input, can make sense if we have more than one video src, + * called on VIDIOC_S_INPUT + */ +static int vidioc_s_input(struct file *file, void *fh, unsigned int i) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + if (!dev->announce_all_caps && !dev->ready_for_capture) + return -ENOTTY; + if (i == 0) + return 0; + return -EINVAL; +} + +/* --------------- V4L2 ioctl buffer related calls ----------------- */ + +/* negotiate buffer type + * only mmap streaming supported + * called on VIDIOC_REQBUFS + */ +static int vidioc_reqbufs(struct file *file, void *fh, + struct v4l2_requestbuffers *b) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + int i; + MARK(); + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + + dprintk("reqbufs: %d\t%d=%d\n", b->memory, b->count, + dev->buffers_number); + + if (opener->timeout_image_io) { + dev->timeout_image_io = 0; + if (b->memory != V4L2_MEMORY_MMAP) + return -EINVAL; + b->count = 2; + return 0; + } + + if (V4L2_TYPE_IS_OUTPUT(b->type) && (!dev->ready_for_output)) { + return -EBUSY; + } + + init_buffers(dev); + switch (b->memory) { + case V4L2_MEMORY_MMAP: + /* do nothing here, buffers are always allocated */ + if (b->count < 1 || dev->buffers_number < 1) + return 0; + + if (b->count > dev->buffers_number) + b->count = dev->buffers_number; + + /* make sure that outbufs_list contains buffers from 0 to used_buffers-1 + * actually, it will have been already populated via v4l2_loopback_init() + * at this point */ + if (list_empty(&dev->outbufs_list)) { + for (i = 0; i < dev->used_buffers; ++i) + list_add_tail(&dev->buffers[i].list_head, + &dev->outbufs_list); + } + + /* also, if dev->used_buffers is going to be decreased, we should remove + * out-of-range buffers from outbufs_list, and fix bufpos2index mapping */ + if (b->count < dev->used_buffers) { + struct v4l2l_buffer *pos, *n; + + list_for_each_entry_safe(pos, n, &dev->outbufs_list, + list_head) { + if (pos->buffer.index >= b->count) + list_del(&pos->list_head); + } + + /* after we update dev->used_buffers, buffers in outbufs_list will + * correspond to dev->write_position + [0;b->count-1] range */ + i = v4l2l_mod64(dev->write_position, b->count); + list_for_each_entry(pos, &dev->outbufs_list, + list_head) { + dev->bufpos2index[i % b->count] = + pos->buffer.index; + ++i; + } + } + + opener->buffers_number = b->count; + if (opener->buffers_number < dev->used_buffers) + dev->used_buffers = opener->buffers_number; + return 0; + default: + return -EINVAL; + } +} + +/* returns buffer asked for; + * give app as many buffers as it wants, if it less than MAX, + * but map them in our inner buffers + * called on VIDIOC_QUERYBUF + */ +static int vidioc_querybuf(struct file *file, void *fh, struct v4l2_buffer *b) +{ + enum v4l2_buf_type type; + int index; + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + + MARK(); + + type = b->type; + index = b->index; + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + + if ((b->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) && + (b->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)) { + return -EINVAL; + } + if (b->index > max_buffers) + return -EINVAL; + + if (opener->timeout_image_io) + *b = dev->timeout_image_buffer.buffer; + else + *b = dev->buffers[b->index % dev->used_buffers].buffer; + + b->type = type; + b->index = index; + dprintkrw("buffer type: %d (of %d with size=%ld)\n", b->memory, + dev->buffers_number, dev->buffer_size); + + /* Hopefully fix 'DQBUF return bad index if queue bigger then 2 for capture' + https://github.com/umlaeute/v4l2loopback/issues/60 */ + b->flags &= ~V4L2_BUF_FLAG_DONE; + b->flags |= V4L2_BUF_FLAG_QUEUED; + + return 0; +} + +static void buffer_written(struct v4l2_loopback_device *dev, + struct v4l2l_buffer *buf) +{ + del_timer_sync(&dev->sustain_timer); + del_timer_sync(&dev->timeout_timer); + + spin_lock_bh(&dev->list_lock); + list_move_tail(&buf->list_head, &dev->outbufs_list); + spin_unlock_bh(&dev->list_lock); + + spin_lock_bh(&dev->lock); + dev->bufpos2index[v4l2l_mod64(dev->write_position, dev->used_buffers)] = + buf->buffer.index; + ++dev->write_position; + dev->reread_count = 0; + + check_timers(dev); + spin_unlock_bh(&dev->lock); +} + +/* put buffer to queue + * called on VIDIOC_QBUF + */ +static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + struct v4l2l_buffer *b; + int index; + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + + if (buf->index > max_buffers) + return -EINVAL; + if (opener->timeout_image_io) + return 0; + + index = buf->index % dev->used_buffers; + b = &dev->buffers[index]; + + switch (buf->type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + dprintkrw( + "qbuf(CAPTURE)#%d: buffer#%d @ %p type=%d bytesused=%d length=%d flags=%x field=%d timestamp=%lld.%06ld sequence=%d\n", + index, buf->index, buf, buf->type, buf->bytesused, + buf->length, buf->flags, buf->field, + (long long)buf->timestamp.tv_sec, + (long int)buf->timestamp.tv_usec, buf->sequence); + set_queued(b); + return 0; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + dprintkrw( + "qbuf(OUTPUT)#%d: buffer#%d @ %p type=%d bytesused=%d length=%d flags=%x field=%d timestamp=%lld.%06ld sequence=%d\n", + index, buf->index, buf, buf->type, buf->bytesused, + buf->length, buf->flags, buf->field, + (long long)buf->timestamp.tv_sec, + (long int)buf->timestamp.tv_usec, buf->sequence); + if ((!(b->buffer.flags & V4L2_BUF_FLAG_TIMESTAMP_COPY)) && + (buf->timestamp.tv_sec == 0 && buf->timestamp.tv_usec == 0)) + v4l2l_get_timestamp(&b->buffer); + else { + b->buffer.timestamp = buf->timestamp; + b->buffer.flags |= V4L2_BUF_FLAG_TIMESTAMP_COPY; + } + if (dev->pix_format_has_valid_sizeimage) { + if (buf->bytesused >= dev->pix_format.sizeimage) { + b->buffer.bytesused = dev->pix_format.sizeimage; + } else { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) + dev_warn_ratelimited( + &dev->vdev->dev, +#else + dprintkrw( +#endif + "warning queued output buffer bytesused too small %d < %d\n", + buf->bytesused, + dev->pix_format.sizeimage); + b->buffer.bytesused = buf->bytesused; + } + } else { + b->buffer.bytesused = buf->bytesused; + } + + set_done(b); + buffer_written(dev, b); + + /* Hopefully fix 'DQBUF return bad index if queue bigger then 2 for capture' + https://github.com/umlaeute/v4l2loopback/issues/60 */ + buf->flags &= ~V4L2_BUF_FLAG_DONE; + buf->flags |= V4L2_BUF_FLAG_QUEUED; + + wake_up_all(&dev->read_event); + return 0; + default: + return -EINVAL; + } +} + +static int can_read(struct v4l2_loopback_device *dev, + struct v4l2_loopback_opener *opener) +{ + int ret; + + spin_lock_bh(&dev->lock); + check_timers(dev); + ret = dev->write_position > opener->read_position || + dev->reread_count > opener->reread_count || dev->timeout_happened; + spin_unlock_bh(&dev->lock); + return ret; +} + +static int get_capture_buffer(struct file *file) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(file->private_data); + int pos, ret; + int timeout_happened; + + if ((file->f_flags & O_NONBLOCK) && + (dev->write_position <= opener->read_position && + dev->reread_count <= opener->reread_count && + !dev->timeout_happened)) + return -EAGAIN; + wait_event_interruptible(dev->read_event, can_read(dev, opener)); + + spin_lock_bh(&dev->lock); + if (dev->write_position == opener->read_position) { + if (dev->reread_count > opener->reread_count + 2) + opener->reread_count = dev->reread_count - 1; + ++opener->reread_count; + pos = v4l2l_mod64(opener->read_position + dev->used_buffers - 1, + dev->used_buffers); + } else { + opener->reread_count = 0; + if (dev->write_position > + opener->read_position + dev->used_buffers) + opener->read_position = dev->write_position - 1; + pos = v4l2l_mod64(opener->read_position, dev->used_buffers); + ++opener->read_position; + } + timeout_happened = dev->timeout_happened; + dev->timeout_happened = 0; + spin_unlock_bh(&dev->lock); + + ret = dev->bufpos2index[pos]; + if (timeout_happened) { + if (ret < 0) { + dprintk("trying to return not mapped buf[%d]\n", ret); + return -EFAULT; + } + /* although allocated on-demand, timeout_image is freed only + * in free_buffers(), so we don't need to worry about it being + * deallocated suddenly */ + memcpy(dev->image + dev->buffers[ret].buffer.m.offset, + dev->timeout_image, dev->buffer_size); + } + return ret; +} + +/* put buffer to dequeue + * called on VIDIOC_DQBUF + */ +static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + int index; + struct v4l2l_buffer *b; + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + if (opener->timeout_image_io) { + *buf = dev->timeout_image_buffer.buffer; + return 0; + } + + switch (buf->type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + index = get_capture_buffer(file); + if (index < 0) + return index; + dprintkrw("capture DQBUF pos: %lld index: %d\n", + (long long)(opener->read_position - 1), index); + if (!(dev->buffers[index].buffer.flags & + V4L2_BUF_FLAG_MAPPED)) { + dprintk("trying to return not mapped buf[%d]\n", index); + return -EINVAL; + } + unset_flags(&dev->buffers[index]); + *buf = dev->buffers[index].buffer; + dprintkrw( + "dqbuf(CAPTURE)#%d: buffer#%d @ %p type=%d bytesused=%d length=%d flags=%x field=%d timestamp=%lld.%06ld sequence=%d\n", + index, buf->index, buf, buf->type, buf->bytesused, + buf->length, buf->flags, buf->field, + (long long)buf->timestamp.tv_sec, + (long int)buf->timestamp.tv_usec, buf->sequence); + return 0; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + spin_lock_bh(&dev->list_lock); + + b = list_entry(dev->outbufs_list.prev, struct v4l2l_buffer, + list_head); + list_move_tail(&b->list_head, &dev->outbufs_list); + + spin_unlock_bh(&dev->list_lock); + dprintkrw("output DQBUF index: %d\n", b->buffer.index); + unset_flags(b); + *buf = b->buffer; + buf->type = V4L2_BUF_TYPE_VIDEO_OUTPUT; + dprintkrw( + "dqbuf(OUTPUT)#%d: buffer#%d @ %p type=%d bytesused=%d length=%d flags=%x field=%d timestamp=%lld.%06ld sequence=%d\n", + index, buf->index, buf, buf->type, buf->bytesused, + buf->length, buf->flags, buf->field, + (long long)buf->timestamp.tv_sec, + (long int)buf->timestamp.tv_usec, buf->sequence); + return 0; + default: + return -EINVAL; + } +} + +/* ------------- STREAMING ------------------- */ + +/* start streaming + * called on VIDIOC_STREAMON + */ +static int vidioc_streamon(struct file *file, void *fh, enum v4l2_buf_type type) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + MARK(); + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + + switch (type) { + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + if (!dev->ready_for_capture) { + int ret = allocate_buffers(dev); + if (ret < 0) + return ret; + } + opener->type = WRITER; + dev->ready_for_output = 0; + dev->ready_for_capture++; + return 0; + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + if (!dev->ready_for_capture) + return -EIO; + if (dev->active_readers > 0) + return -EBUSY; + opener->type = READER; + dev->active_readers++; + client_usage_queue_event(dev->vdev); + return 0; + default: + return -EINVAL; + } + return -EINVAL; +} + +/* stop streaming + * called on VIDIOC_STREAMOFF + */ +static int vidioc_streamoff(struct file *file, void *fh, + enum v4l2_buf_type type) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + + MARK(); + dprintk("%d\n", type); + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + switch (type) { + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + if (dev->ready_for_capture > 0) + dev->ready_for_capture--; + return 0; + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + if (opener->type == READER) { + opener->type = 0; + dev->active_readers--; + client_usage_queue_event(dev->vdev); + } + return 0; + default: + return -EINVAL; + } + return -EINVAL; +} + +#ifdef CONFIG_VIDEO_V4L1_COMPAT +static int vidiocgmbuf(struct file *file, void *fh, struct video_mbuf *p) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + p->frames = dev->buffers_number; + p->offsets[0] = 0; + p->offsets[1] = 0; + p->size = dev->buffer_size; + return 0; +} +#endif + +static void client_usage_queue_event(struct video_device *vdev) +{ + struct v4l2_event ev; + struct v4l2_loopback_device *dev; + + dev = container_of(vdev->v4l2_dev, struct v4l2_loopback_device, + v4l2_dev); + + memset(&ev, 0, sizeof(ev)); + ev.type = V4L2_EVENT_PRI_CLIENT_USAGE; + ((struct v4l2_event_client_usage *)&ev.u)->count = dev->active_readers; + + v4l2_event_queue(vdev, &ev); +} + +static int client_usage_ops_add(struct v4l2_subscribed_event *sev, + unsigned elems) +{ + if (!(sev->flags & V4L2_EVENT_SUB_FL_SEND_INITIAL)) + return 0; + + client_usage_queue_event(sev->fh->vdev); + return 0; +} + +static void client_usage_ops_replace(struct v4l2_event *old, + const struct v4l2_event *new) +{ + *((struct v4l2_event_client_usage *)&old->u) = + *((struct v4l2_event_client_usage *)&new->u); +} + +static void client_usage_ops_merge(const struct v4l2_event *old, + struct v4l2_event *new) +{ + *((struct v4l2_event_client_usage *)&new->u) = + *((struct v4l2_event_client_usage *)&old->u); +} + +const struct v4l2_subscribed_event_ops client_usage_ops = { + .add = client_usage_ops_add, + .replace = client_usage_ops_replace, + .merge = client_usage_ops_merge, +}; + +static int vidioc_subscribe_event(struct v4l2_fh *fh, + const struct v4l2_event_subscription *sub) +{ + switch (sub->type) { + case V4L2_EVENT_CTRL: + return v4l2_ctrl_subscribe_event(fh, sub); + case V4L2_EVENT_PRI_CLIENT_USAGE: + return v4l2_event_subscribe(fh, sub, 0, &client_usage_ops); + } + + return -EINVAL; +} + +/* file operations */ +static void vm_open(struct vm_area_struct *vma) +{ + struct v4l2l_buffer *buf; + MARK(); + + buf = vma->vm_private_data; + buf->use_count++; + + buf->buffer.flags |= V4L2_BUF_FLAG_MAPPED; +} + +static void vm_close(struct vm_area_struct *vma) +{ + struct v4l2l_buffer *buf; + MARK(); + + buf = vma->vm_private_data; + buf->use_count--; + + if (buf->use_count <= 0) + buf->buffer.flags &= ~V4L2_BUF_FLAG_MAPPED; +} + +static struct vm_operations_struct vm_ops = { + .open = vm_open, + .close = vm_close, +}; + +static int v4l2_loopback_mmap(struct file *file, struct vm_area_struct *vma) +{ + u8 *addr; + unsigned long start; + unsigned long size; + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + struct v4l2l_buffer *buffer = NULL; + MARK(); + + start = (unsigned long)vma->vm_start; + size = (unsigned long)(vma->vm_end - vma->vm_start); + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(file->private_data); + + if (size > dev->buffer_size) { + dprintk("userspace tries to mmap too much, fail\n"); + return -EINVAL; + } + if (opener->timeout_image_io) { + /* we are going to map the timeout_image_buffer */ + if ((vma->vm_pgoff << PAGE_SHIFT) != + dev->buffer_size * MAX_BUFFERS) { + dprintk("invalid mmap offset for timeout_image_io mode\n"); + return -EINVAL; + } + } else if ((vma->vm_pgoff << PAGE_SHIFT) > + dev->buffer_size * (dev->buffers_number - 1)) { + dprintk("userspace tries to mmap too far, fail\n"); + return -EINVAL; + } + + /* FIXXXXXME: allocation should not happen here! */ + if (NULL == dev->image) + if (allocate_buffers(dev) < 0) + return -EINVAL; + + if (opener->timeout_image_io) { + buffer = &dev->timeout_image_buffer; + addr = dev->timeout_image; + } else { + int i; + for (i = 0; i < dev->buffers_number; ++i) { + buffer = &dev->buffers[i]; + if ((buffer->buffer.m.offset >> PAGE_SHIFT) == + vma->vm_pgoff) + break; + } + + if (i >= dev->buffers_number) + return -EINVAL; + + addr = dev->image + (vma->vm_pgoff << PAGE_SHIFT); + } + + while (size > 0) { + struct page *page; + + page = vmalloc_to_page(addr); + + if (vm_insert_page(vma, start, page) < 0) + return -EAGAIN; + + start += PAGE_SIZE; + addr += PAGE_SIZE; + size -= PAGE_SIZE; + } + + vma->vm_ops = &vm_ops; + vma->vm_private_data = buffer; + + vm_open(vma); + + MARK(); + return 0; +} + +static unsigned int v4l2_loopback_poll(struct file *file, + struct poll_table_struct *pts) +{ + struct v4l2_loopback_opener *opener; + struct v4l2_loopback_device *dev; + __poll_t req_events = poll_requested_events(pts); + int ret_mask = 0; + MARK(); + + opener = fh_to_opener(file->private_data); + dev = v4l2loopback_getdevice(file); + + if (req_events & POLLPRI) { + if (!v4l2_event_pending(&opener->fh)) + poll_wait(file, &opener->fh.wait, pts); + if (v4l2_event_pending(&opener->fh)) { + ret_mask |= POLLPRI; + if (!(req_events & DEFAULT_POLLMASK)) + return ret_mask; + } + } + + switch (opener->type) { + case WRITER: + ret_mask |= POLLOUT | POLLWRNORM; + break; + case READER: + if (!can_read(dev, opener)) { + if (ret_mask) + return ret_mask; + poll_wait(file, &dev->read_event, pts); + } + if (can_read(dev, opener)) + ret_mask |= POLLIN | POLLRDNORM; + if (v4l2_event_pending(&opener->fh)) + ret_mask |= POLLPRI; + break; + default: + break; + } + + MARK(); + return ret_mask; +} + +/* do not want to limit device opens, it can be as many readers as user want, + * writers are limited by means of setting writer field */ +static int v4l2_loopback_open(struct file *file) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + MARK(); + dev = v4l2loopback_getdevice(file); + if (dev->open_count.counter >= dev->max_openers) + return -EBUSY; + /* kfree on close */ + opener = kzalloc(sizeof(*opener), GFP_KERNEL); + if (opener == NULL) + return -ENOMEM; + + atomic_inc(&dev->open_count); + + opener->timeout_image_io = dev->timeout_image_io; + if (opener->timeout_image_io) { + int r = allocate_timeout_image(dev); + + if (r < 0) { + dprintk("timeout image allocation failed\n"); + + atomic_dec(&dev->open_count); + + kfree(opener); + return r; + } + } + + v4l2_fh_init(&opener->fh, video_devdata(file)); + file->private_data = &opener->fh; + + v4l2_fh_add(&opener->fh); + dprintk("opened dev:%p with image:%p\n", dev, dev ? dev->image : NULL); + MARK(); + return 0; +} + +static int v4l2_loopback_close(struct file *file) +{ + struct v4l2_loopback_opener *opener; + struct v4l2_loopback_device *dev; + int is_writer = 0, is_reader = 0; + MARK(); + + opener = fh_to_opener(file->private_data); + dev = v4l2loopback_getdevice(file); + + if (WRITER == opener->type) + is_writer = 1; + if (READER == opener->type) + is_reader = 1; + + atomic_dec(&dev->open_count); + if (dev->open_count.counter == 0) { + del_timer_sync(&dev->sustain_timer); + del_timer_sync(&dev->timeout_timer); + } + try_free_buffers(dev); + + v4l2_fh_del(&opener->fh); + v4l2_fh_exit(&opener->fh); + + kfree(opener); + if (is_writer) + dev->ready_for_output = 1; + if (is_reader) { + dev->active_readers--; + client_usage_queue_event(dev->vdev); + } + MARK(); + return 0; +} + +static ssize_t v4l2_loopback_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int read_index; + struct v4l2_loopback_device *dev; + struct v4l2_buffer *b; + MARK(); + + dev = v4l2loopback_getdevice(file); + + read_index = get_capture_buffer(file); + if (read_index < 0) + return read_index; + if (count > dev->buffer_size) + count = dev->buffer_size; + b = &dev->buffers[read_index].buffer; + if (count > b->bytesused) + count = b->bytesused; + if (copy_to_user((void *)buf, (void *)(dev->image + b->m.offset), + count)) { + printk(KERN_ERR + "v4l2-loopback: failed copy_to_user() in read buf\n"); + return -EFAULT; + } + dprintkrw("leave v4l2_loopback_read()\n"); + return count; +} + +static ssize_t v4l2_loopback_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct v4l2_loopback_opener *opener; + struct v4l2_loopback_device *dev; + int write_index; + struct v4l2_buffer *b; + int err = 0; + + MARK(); + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(file->private_data); + + if (UNNEGOTIATED == opener->type) { + spin_lock(&dev->lock); + + if (dev->ready_for_output) { + err = vidioc_streamon(file, file->private_data, + V4L2_BUF_TYPE_VIDEO_OUTPUT); + } + + spin_unlock(&dev->lock); + + if (err < 0) + return err; + } + + if (WRITER != opener->type) + return -EINVAL; + + if (!dev->ready_for_capture) { + int ret = allocate_buffers(dev); + if (ret < 0) + return ret; + dev->ready_for_capture = 1; + } + dprintkrw("v4l2_loopback_write() trying to write %zu bytes\n", count); + if (count > dev->buffer_size) + count = dev->buffer_size; + + write_index = v4l2l_mod64(dev->write_position, dev->used_buffers); + b = &dev->buffers[write_index].buffer; + + if (copy_from_user((void *)(dev->image + b->m.offset), (void *)buf, + count)) { + printk(KERN_ERR + "v4l2-loopback: failed copy_from_user() in write buf, could not write %zu\n", + count); + return -EFAULT; + } + v4l2l_get_timestamp(b); + b->bytesused = count; + b->sequence = dev->write_position; + buffer_written(dev, &dev->buffers[write_index]); + wake_up_all(&dev->read_event); + dprintkrw("leave v4l2_loopback_write()\n"); + return count; +} + +/* init functions */ +/* frees buffers, if already allocated */ +static void free_buffers(struct v4l2_loopback_device *dev) +{ + MARK(); + dprintk("freeing image@%p for dev:%p\n", dev ? dev->image : NULL, dev); + if (!dev) + return; + if (dev->image) { + vfree(dev->image); + dev->image = NULL; + } + if (dev->timeout_image) { + vfree(dev->timeout_image); + dev->timeout_image = NULL; + } + dev->imagesize = 0; +} +/* frees buffers, if they are no longer needed */ +static void try_free_buffers(struct v4l2_loopback_device *dev) +{ + MARK(); + if (0 == dev->open_count.counter && !dev->keep_format) { + free_buffers(dev); + dev->ready_for_capture = 0; + dev->buffer_size = 0; + dev->write_position = 0; + } +} +/* allocates buffers, if buffer_size is set */ +static int allocate_buffers(struct v4l2_loopback_device *dev) +{ + int err; + + MARK(); + /* vfree on close file operation in case no open handles left */ + + if (dev->buffer_size < 1 || dev->buffers_number < 1) + return -EINVAL; + + if ((__LONG_MAX__ / dev->buffer_size) < dev->buffers_number) + return -ENOSPC; + + if (dev->image) { + dprintk("allocating buffers again: %ld %ld\n", + dev->buffer_size * dev->buffers_number, dev->imagesize); + /* FIXME: prevent double allocation more intelligently! */ + if (dev->buffer_size * dev->buffers_number == dev->imagesize) + return 0; + + /* check whether the total number of readers/writers is <=1 */ + if ((dev->ready_for_capture + dev->active_readers) <= 1) + free_buffers(dev); + else + return -EINVAL; + } + + dev->imagesize = (unsigned long)dev->buffer_size * + (unsigned long)dev->buffers_number; + + dprintk("allocating %ld = %ldx%d\n", dev->imagesize, dev->buffer_size, + dev->buffers_number); + err = -ENOMEM; + + if (dev->timeout_jiffies > 0) { + err = allocate_timeout_image(dev); + if (err < 0) + goto error; + } + + dev->image = vmalloc(dev->imagesize); + if (dev->image == NULL) + goto error; + + dprintk("vmallocated %ld bytes\n", dev->imagesize); + MARK(); + + init_buffers(dev); + return 0; + +error: + free_buffers(dev); + return err; +} + +/* init inner buffers, they are capture mode and flags are set as + * for capture mod buffers */ +static void init_buffers(struct v4l2_loopback_device *dev) +{ + int i; + int buffer_size; + int bytesused; + MARK(); + + buffer_size = dev->buffer_size; + bytesused = dev->pix_format.sizeimage; + for (i = 0; i < dev->buffers_number; ++i) { + struct v4l2_buffer *b = &dev->buffers[i].buffer; + b->index = i; + b->bytesused = bytesused; + b->length = buffer_size; + b->field = V4L2_FIELD_NONE; + b->flags = 0; + b->m.offset = i * buffer_size; + b->memory = V4L2_MEMORY_MMAP; + b->sequence = 0; + b->timestamp.tv_sec = 0; + b->timestamp.tv_usec = 0; + b->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + + v4l2l_get_timestamp(b); + } + dev->timeout_image_buffer = dev->buffers[0]; + dev->timeout_image_buffer.buffer.m.offset = MAX_BUFFERS * buffer_size; + MARK(); +} + +static int allocate_timeout_image(struct v4l2_loopback_device *dev) +{ + MARK(); + if (dev->buffer_size <= 0) { + dev->timeout_image_io = 0; + return -EINVAL; + } + + if (dev->timeout_image == NULL) { + dev->timeout_image = vzalloc(dev->buffer_size); + if (dev->timeout_image == NULL) { + dev->timeout_image_io = 0; + return -ENOMEM; + } + } + return 0; +} + +/* fills and register video device */ +static void init_vdev(struct video_device *vdev, int nr) +{ + MARK(); + +#ifdef V4L2LOOPBACK_WITH_STD + vdev->tvnorms = V4L2_STD_ALL; +#endif /* V4L2LOOPBACK_WITH_STD */ + + vdev->vfl_type = VFL_TYPE_VIDEO; + vdev->fops = &v4l2_loopback_fops; + vdev->ioctl_ops = &v4l2_loopback_ioctl_ops; + vdev->release = &video_device_release; + vdev->minor = -1; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) + vdev->device_caps = V4L2_CAP_DEVICE_CAPS | V4L2_CAP_VIDEO_CAPTURE | + V4L2_CAP_VIDEO_OUTPUT | V4L2_CAP_READWRITE | + V4L2_CAP_STREAMING; +#endif + + if (debug > 1) + vdev->dev_debug = V4L2_DEV_DEBUG_IOCTL | + V4L2_DEV_DEBUG_IOCTL_ARG; + + vdev->vfl_dir = VFL_DIR_M2M; + + MARK(); +} + +/* init default capture parameters, only fps may be changed in future */ +static void init_capture_param(struct v4l2_captureparm *capture_param) +{ + MARK(); + capture_param->capability = 0; + capture_param->capturemode = 0; + capture_param->extendedmode = 0; + capture_param->readbuffers = max_buffers; + capture_param->timeperframe.numerator = 1; + capture_param->timeperframe.denominator = 30; +} + +static void check_timers(struct v4l2_loopback_device *dev) +{ + if (!dev->ready_for_capture) + return; + + if (dev->timeout_jiffies > 0 && !timer_pending(&dev->timeout_timer)) + mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies); + if (dev->sustain_framerate && !timer_pending(&dev->sustain_timer)) + mod_timer(&dev->sustain_timer, + jiffies + dev->frame_jiffies * 3 / 2); +} +#ifdef HAVE_TIMER_SETUP +static void sustain_timer_clb(struct timer_list *t) +{ + struct v4l2_loopback_device *dev = from_timer(dev, t, sustain_timer); +#else +static void sustain_timer_clb(unsigned long nr) +{ + struct v4l2_loopback_device *dev = + idr_find(&v4l2loopback_index_idr, nr); +#endif + spin_lock(&dev->lock); + if (dev->sustain_framerate) { + dev->reread_count++; + dprintkrw("reread: %lld %d\n", (long long)dev->write_position, + dev->reread_count); + if (dev->reread_count == 1) + mod_timer(&dev->sustain_timer, + jiffies + max(1UL, dev->frame_jiffies / 2)); + else + mod_timer(&dev->sustain_timer, + jiffies + dev->frame_jiffies); + wake_up_all(&dev->read_event); + } + spin_unlock(&dev->lock); +} +#ifdef HAVE_TIMER_SETUP +static void timeout_timer_clb(struct timer_list *t) +{ + struct v4l2_loopback_device *dev = from_timer(dev, t, timeout_timer); +#else +static void timeout_timer_clb(unsigned long nr) +{ + struct v4l2_loopback_device *dev = + idr_find(&v4l2loopback_index_idr, nr); +#endif + spin_lock(&dev->lock); + if (dev->timeout_jiffies > 0) { + dev->timeout_happened = 1; + mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies); + wake_up_all(&dev->read_event); + } + spin_unlock(&dev->lock); +} + +/* init loopback main structure */ +#define DEFAULT_FROM_CONF(confmember, default_condition, default_value) \ + ((conf) ? \ + ((conf->confmember default_condition) ? (default_value) : \ + (conf->confmember)) : \ + default_value) + +static int v4l2_loopback_add(struct v4l2_loopback_config *conf, int *ret_nr) +{ + struct v4l2_loopback_device *dev; + struct v4l2_ctrl_handler *hdl; + struct v4l2loopback_private *vdev_priv = NULL; + + int err = -ENOMEM; + + u32 _width = V4L2LOOPBACK_SIZE_DEFAULT_WIDTH; + u32 _height = V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT; + + u32 _min_width = DEFAULT_FROM_CONF(min_width, + < V4L2LOOPBACK_SIZE_MIN_WIDTH, + V4L2LOOPBACK_SIZE_MIN_WIDTH); + u32 _min_height = DEFAULT_FROM_CONF(min_height, + < V4L2LOOPBACK_SIZE_MIN_HEIGHT, + V4L2LOOPBACK_SIZE_MIN_HEIGHT); + u32 _max_width = DEFAULT_FROM_CONF(max_width, < _min_width, max_width); + u32 _max_height = + DEFAULT_FROM_CONF(max_height, < _min_height, max_height); + bool _announce_all_caps = (conf && conf->announce_all_caps >= 0) ? + (conf->announce_all_caps) : + V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS; + int _max_buffers = DEFAULT_FROM_CONF(max_buffers, <= 0, max_buffers); + int _max_openers = DEFAULT_FROM_CONF(max_openers, <= 0, max_openers); + + int nr = -1; + + _announce_all_caps = (!!_announce_all_caps); + + if (conf) { + const int output_nr = conf->output_nr; +#ifdef SPLIT_DEVICES + const int capture_nr = conf->capture_nr; +#else + const int capture_nr = output_nr; +#endif + if (capture_nr >= 0 && output_nr == capture_nr) { + nr = output_nr; + } else if (capture_nr < 0 && output_nr < 0) { + nr = -1; + } else if (capture_nr < 0) { + nr = output_nr; + } else if (output_nr < 0) { + nr = capture_nr; + } else { + printk(KERN_ERR + "split OUTPUT and CAPTURE devices not yet supported."); + printk(KERN_INFO + "both devices must have the same number (%d != %d).", + output_nr, capture_nr); + return -EINVAL; + } + } + + if (idr_find(&v4l2loopback_index_idr, nr)) + return -EEXIST; + + dprintk("creating v4l2loopback-device #%d\n", nr); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + /* allocate id, if @id >= 0, we're requesting that specific id */ + if (nr >= 0) { + err = idr_alloc(&v4l2loopback_index_idr, dev, nr, nr + 1, + GFP_KERNEL); + if (err == -ENOSPC) + err = -EEXIST; + } else { + err = idr_alloc(&v4l2loopback_index_idr, dev, 0, 0, GFP_KERNEL); + } + if (err < 0) + goto out_free_dev; + nr = err; + err = -ENOMEM; + + if (conf && conf->card_label[0]) { + snprintf(dev->card_label, sizeof(dev->card_label), "%s", + conf->card_label); + } else { + snprintf(dev->card_label, sizeof(dev->card_label), + "Dummy video device (0x%04X)", nr); + } + snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), + "v4l2loopback-%03d", nr); + + err = v4l2_device_register(NULL, &dev->v4l2_dev); + if (err) + goto out_free_idr; + MARK(); + + dev->vdev = video_device_alloc(); + if (dev->vdev == NULL) { + err = -ENOMEM; + goto out_unregister; + } + + vdev_priv = kzalloc(sizeof(struct v4l2loopback_private), GFP_KERNEL); + if (vdev_priv == NULL) { + err = -ENOMEM; + goto out_unregister; + } + + video_set_drvdata(dev->vdev, vdev_priv); + if (video_get_drvdata(dev->vdev) == NULL) { + err = -ENOMEM; + goto out_unregister; + } + + MARK(); + snprintf(dev->vdev->name, sizeof(dev->vdev->name), "%s", + dev->card_label); + + vdev_priv->device_nr = nr; + + init_vdev(dev->vdev, nr); + dev->vdev->v4l2_dev = &dev->v4l2_dev; + init_capture_param(&dev->capture_param); + err = set_timeperframe(dev, &dev->capture_param.timeperframe); + if (err) + goto out_unregister; + dev->keep_format = 0; + dev->sustain_framerate = 0; + + dev->announce_all_caps = _announce_all_caps; + dev->min_width = _min_width; + dev->min_height = _min_height; + dev->max_width = _max_width; + dev->max_height = _max_height; + dev->max_openers = _max_openers; + dev->buffers_number = dev->used_buffers = _max_buffers; + + dev->write_position = 0; + + MARK(); + spin_lock_init(&dev->lock); + spin_lock_init(&dev->list_lock); + INIT_LIST_HEAD(&dev->outbufs_list); + if (list_empty(&dev->outbufs_list)) { + int i; + + for (i = 0; i < dev->used_buffers; ++i) + list_add_tail(&dev->buffers[i].list_head, + &dev->outbufs_list); + } + memset(dev->bufpos2index, 0, sizeof(dev->bufpos2index)); + atomic_set(&dev->open_count, 0); + dev->ready_for_capture = 0; + dev->ready_for_output = 1; + + dev->buffer_size = 0; + dev->image = NULL; + dev->imagesize = 0; +#ifdef HAVE_TIMER_SETUP + timer_setup(&dev->sustain_timer, sustain_timer_clb, 0); + timer_setup(&dev->timeout_timer, timeout_timer_clb, 0); +#else + setup_timer(&dev->sustain_timer, sustain_timer_clb, nr); + setup_timer(&dev->timeout_timer, timeout_timer_clb, nr); +#endif + dev->reread_count = 0; + dev->timeout_jiffies = 0; + dev->timeout_image = NULL; + dev->timeout_happened = 0; + + hdl = &dev->ctrl_handler; + err = v4l2_ctrl_handler_init(hdl, 4); + if (err) + goto out_unregister; + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_keepformat, NULL); + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_sustainframerate, NULL); + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeout, NULL); + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeoutimageio, NULL); + if (hdl->error) { + err = hdl->error; + goto out_free_handler; + } + dev->v4l2_dev.ctrl_handler = hdl; + + err = v4l2_ctrl_handler_setup(hdl); + if (err) + goto out_free_handler; + + /* FIXME set buffers to 0 */ + + /* Set initial format */ + if (_width < _min_width) + _width = _min_width; + if (_width > _max_width) + _width = _max_width; + if (_height < _min_height) + _height = _min_height; + if (_height > _max_height) + _height = _max_height; + + dev->pix_format.width = _width; + dev->pix_format.height = _height; + dev->pix_format.pixelformat = formats[0].fourcc; + dev->pix_format.colorspace = + V4L2_COLORSPACE_DEFAULT; /* do we need to set this ? */ + dev->pix_format.field = V4L2_FIELD_NONE; + + dev->buffer_size = PAGE_ALIGN(dev->pix_format.sizeimage); + dprintk("buffer_size = %ld (=%d)\n", dev->buffer_size, + dev->pix_format.sizeimage); + + if (dev->buffer_size && ((err = allocate_buffers(dev)) < 0)) + goto out_free_handler; + + init_waitqueue_head(&dev->read_event); + + /* register the device -> it creates /dev/video* */ + if (video_register_device(dev->vdev, VFL_TYPE_VIDEO, nr) < 0) { + printk(KERN_ERR + "v4l2loopback: failed video_register_device()\n"); + err = -EFAULT; + goto out_free_device; + } + v4l2loopback_create_sysfs(dev->vdev); + + MARK(); + if (ret_nr) + *ret_nr = dev->vdev->num; + return 0; + +out_free_device: + video_device_release(dev->vdev); +out_free_handler: + v4l2_ctrl_handler_free(&dev->ctrl_handler); +out_unregister: + video_set_drvdata(dev->vdev, NULL); + if (vdev_priv != NULL) + kfree(vdev_priv); + v4l2_device_unregister(&dev->v4l2_dev); +out_free_idr: + idr_remove(&v4l2loopback_index_idr, nr); +out_free_dev: + kfree(dev); + return err; +} + +static void v4l2_loopback_remove(struct v4l2_loopback_device *dev) +{ + free_buffers(dev); + v4l2loopback_remove_sysfs(dev->vdev); + kfree(video_get_drvdata(dev->vdev)); + video_unregister_device(dev->vdev); + v4l2_device_unregister(&dev->v4l2_dev); + v4l2_ctrl_handler_free(&dev->ctrl_handler); + kfree(dev); +} + +static long v4l2loopback_control_ioctl(struct file *file, unsigned int cmd, + unsigned long parm) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_config conf; + struct v4l2_loopback_config *confptr = &conf; + int device_nr, capture_nr, output_nr; + int ret; + + ret = mutex_lock_killable(&v4l2loopback_ctl_mutex); + if (ret) + return ret; + + ret = -EINVAL; + switch (cmd) { + default: + ret = -ENOSYS; + break; + /* add a v4l2loopback device (pair), based on the user-provided specs */ + case V4L2LOOPBACK_CTL_ADD: + if (parm) { + if ((ret = copy_from_user(&conf, (void *)parm, + sizeof(conf))) < 0) + break; + } else + confptr = NULL; + ret = v4l2_loopback_add(confptr, &device_nr); + if (ret >= 0) + ret = device_nr; + break; + /* remove a v4l2loopback device (both capture and output) */ + case V4L2LOOPBACK_CTL_REMOVE: + ret = v4l2loopback_lookup((int)parm, &dev); + if (ret >= 0 && dev) { + int nr = ret; + ret = -EBUSY; + if (dev->open_count.counter > 0) + break; + idr_remove(&v4l2loopback_index_idr, nr); + v4l2_loopback_remove(dev); + ret = 0; + }; + break; + /* get information for a loopback device. + * this is mostly about limits (which cannot be queried directly with VIDIOC_G_FMT and friends + */ + case V4L2LOOPBACK_CTL_QUERY: + if (!parm) + break; + if ((ret = copy_from_user(&conf, (void *)parm, sizeof(conf))) < + 0) + break; + capture_nr = output_nr = conf.output_nr; +#ifdef SPLIT_DEVICES + capture_nr = conf.capture_nr; +#endif + device_nr = (output_nr < 0) ? capture_nr : output_nr; + MARK(); + /* get the device from either capture_nr or output_nr (whatever is valid) */ + if ((ret = v4l2loopback_lookup(device_nr, &dev)) < 0) + break; + MARK(); + /* if we got the device from output_nr and there is a valid capture_nr, + * make sure that both refer to the same device (or bail out) + */ + if ((device_nr != capture_nr) && (capture_nr >= 0) && + ((ret = v4l2loopback_lookup(capture_nr, 0)) < 0)) + break; + MARK(); + /* if otoh, we got the device from capture_nr and there is a valid output_nr, + * make sure that both refer to the same device (or bail out) + */ + if ((device_nr != output_nr) && (output_nr >= 0) && + ((ret = v4l2loopback_lookup(output_nr, 0)) < 0)) + break; + MARK(); + + /* v4l2_loopback_config identified a single device, so fetch the data */ + snprintf(conf.card_label, sizeof(conf.card_label), "%s", + dev->card_label); + MARK(); + conf.output_nr = dev->vdev->num; +#ifdef SPLIT_DEVICES + conf.capture_nr = dev->vdev->num; +#endif + conf.min_width = dev->min_width; + conf.min_height = dev->min_height; + conf.max_width = dev->max_width; + conf.max_height = dev->max_height; + conf.announce_all_caps = dev->announce_all_caps; + conf.max_buffers = dev->buffers_number; + conf.max_openers = dev->max_openers; + conf.debug = debug; + MARK(); + if (copy_to_user((void *)parm, &conf, sizeof(conf))) { + ret = -EFAULT; + break; + } + MARK(); + ret = 0; + ; + break; + } + + MARK(); + mutex_unlock(&v4l2loopback_ctl_mutex); + MARK(); + return ret; +} + +/* LINUX KERNEL */ + +static const struct file_operations v4l2loopback_ctl_fops = { + // clang-format off + .owner = THIS_MODULE, + .open = nonseekable_open, + .unlocked_ioctl = v4l2loopback_control_ioctl, + .compat_ioctl = v4l2loopback_control_ioctl, + .llseek = noop_llseek, + // clang-format on +}; + +static struct miscdevice v4l2loopback_misc = { + // clang-format off + .minor = MISC_DYNAMIC_MINOR, + .name = "v4l2loopback", + .fops = &v4l2loopback_ctl_fops, + // clang-format on +}; + +static const struct v4l2_file_operations v4l2_loopback_fops = { + // clang-format off + .owner = THIS_MODULE, + .open = v4l2_loopback_open, + .release = v4l2_loopback_close, + .read = v4l2_loopback_read, + .write = v4l2_loopback_write, + .poll = v4l2_loopback_poll, + .mmap = v4l2_loopback_mmap, + .unlocked_ioctl = video_ioctl2, + // clang-format on +}; + +static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops = { + // clang-format off + .vidioc_querycap = &vidioc_querycap, + .vidioc_enum_framesizes = &vidioc_enum_framesizes, + .vidioc_enum_frameintervals = &vidioc_enum_frameintervals, + + .vidioc_enum_output = &vidioc_enum_output, + .vidioc_g_output = &vidioc_g_output, + .vidioc_s_output = &vidioc_s_output, + + .vidioc_enum_input = &vidioc_enum_input, + .vidioc_g_input = &vidioc_g_input, + .vidioc_s_input = &vidioc_s_input, + + .vidioc_enum_fmt_vid_cap = &vidioc_enum_fmt_cap, + .vidioc_g_fmt_vid_cap = &vidioc_g_fmt_cap, + .vidioc_s_fmt_vid_cap = &vidioc_s_fmt_cap, + .vidioc_try_fmt_vid_cap = &vidioc_try_fmt_cap, + + .vidioc_enum_fmt_vid_out = &vidioc_enum_fmt_out, + .vidioc_s_fmt_vid_out = &vidioc_s_fmt_out, + .vidioc_g_fmt_vid_out = &vidioc_g_fmt_out, + .vidioc_try_fmt_vid_out = &vidioc_try_fmt_out, + +#ifdef V4L2L_OVERLAY + .vidioc_s_fmt_vid_overlay = &vidioc_s_fmt_overlay, + .vidioc_g_fmt_vid_overlay = &vidioc_g_fmt_overlay, +#endif + +#ifdef V4L2LOOPBACK_WITH_STD + .vidioc_s_std = &vidioc_s_std, + .vidioc_g_std = &vidioc_g_std, + .vidioc_querystd = &vidioc_querystd, +#endif /* V4L2LOOPBACK_WITH_STD */ + + .vidioc_g_parm = &vidioc_g_parm, + .vidioc_s_parm = &vidioc_s_parm, + + .vidioc_reqbufs = &vidioc_reqbufs, + .vidioc_querybuf = &vidioc_querybuf, + .vidioc_qbuf = &vidioc_qbuf, + .vidioc_dqbuf = &vidioc_dqbuf, + + .vidioc_streamon = &vidioc_streamon, + .vidioc_streamoff = &vidioc_streamoff, + +#ifdef CONFIG_VIDEO_V4L1_COMPAT + .vidiocgmbuf = &vidiocgmbuf, +#endif + + .vidioc_subscribe_event = &vidioc_subscribe_event, + .vidioc_unsubscribe_event = &v4l2_event_unsubscribe, + // clang-format on +}; + +static int free_device_cb(int id, void *ptr, void *data) +{ + struct v4l2_loopback_device *dev = ptr; + v4l2_loopback_remove(dev); + return 0; +} +static void free_devices(void) +{ + idr_for_each(&v4l2loopback_index_idr, &free_device_cb, NULL); + idr_destroy(&v4l2loopback_index_idr); +} + +static int __init v4l2loopback_init_module(void) +{ + const u32 min_width = V4L2LOOPBACK_SIZE_MIN_WIDTH; + const u32 min_height = V4L2LOOPBACK_SIZE_MIN_HEIGHT; + int err; + int i; + MARK(); + + err = misc_register(&v4l2loopback_misc); + if (err < 0) + return err; + + if (devices < 0) { + devices = 1; + + /* try guessing the devices from the "video_nr" parameter */ + for (i = MAX_DEVICES - 1; i >= 0; i--) { + if (video_nr[i] >= 0) { + devices = i + 1; + break; + } + } + } + + if (devices > MAX_DEVICES) { + devices = MAX_DEVICES; + printk(KERN_INFO + "v4l2loopback: number of initial devices is limited to: %d\n", + MAX_DEVICES); + } + + if (max_buffers > MAX_BUFFERS) { + max_buffers = MAX_BUFFERS; + printk(KERN_INFO + "v4l2loopback: number of buffers is limited to: %d\n", + MAX_BUFFERS); + } + + if (max_openers < 0) { + printk(KERN_INFO + "v4l2loopback: allowing %d openers rather than %d\n", + 2, max_openers); + max_openers = 2; + } + + if (max_width < min_width) { + max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH; + printk(KERN_INFO "v4l2loopback: using max_width %d\n", + max_width); + } + if (max_height < min_height) { + max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT; + printk(KERN_INFO "v4l2loopback: using max_height %d\n", + max_height); + } + + for (i = 0; i < devices; i++) { + struct v4l2_loopback_config cfg = { + // clang-format off + .output_nr = video_nr[i], +#ifdef SPLIT_DEVICES + .capture_nr = video_nr[i], +#endif + .min_width = min_width, + .min_height = min_height, + .max_width = max_width, + .max_height = max_height, + .announce_all_caps = (!exclusive_caps[i]), + .max_buffers = max_buffers, + .max_openers = max_openers, + .debug = debug, + // clang-format on + }; + cfg.card_label[0] = 0; + if (card_label[i]) + snprintf(cfg.card_label, sizeof(cfg.card_label), "%s", + card_label[i]); + err = v4l2_loopback_add(&cfg, 0); + if (err) { + free_devices(); + goto error; + } + } + + dprintk("module installed\n"); + + printk(KERN_INFO "v4l2loopback driver version %d.%d.%d%s loaded\n", + // clang-format off + (V4L2LOOPBACK_VERSION_CODE >> 16) & 0xff, + (V4L2LOOPBACK_VERSION_CODE >> 8) & 0xff, + (V4L2LOOPBACK_VERSION_CODE ) & 0xff, +#ifdef SNAPSHOT_VERSION + " (" __stringify(SNAPSHOT_VERSION) ")" +#else + "" +#endif + ); + // clang-format on + + return 0; +error: + misc_deregister(&v4l2loopback_misc); + return err; +} + +static void v4l2loopback_cleanup_module(void) +{ + MARK(); + /* unregister the device -> it deletes /dev/video* */ + free_devices(); + /* and get rid of /dev/v4l2loopback */ + misc_deregister(&v4l2loopback_misc); + dprintk("module removed\n"); +} + +MODULE_ALIAS_MISCDEV(MISC_DYNAMIC_MINOR); + +module_init(v4l2loopback_init_module); +module_exit(v4l2loopback_cleanup_module); diff --git a/drivers/media/v4l2-core/v4l2loopback.h b/drivers/media/v4l2-core/v4l2loopback.h new file mode 100644 index 000000000000..1bc7e6b747a4 --- /dev/null +++ b/drivers/media/v4l2-core/v4l2loopback.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * v4l2loopback.h + * + * Written by IOhannes m zmölnig, 7/1/20. + * + * Copyright 2020 by IOhannes m zmölnig. Redistribution of this file is + * permitted under the GNU General Public License. + */ +#ifndef _V4L2LOOPBACK_H +#define _V4L2LOOPBACK_H + +#define V4L2LOOPBACK_VERSION_MAJOR 0 +#define V4L2LOOPBACK_VERSION_MINOR 13 +#define V4L2LOOPBACK_VERSION_BUGFIX 1 + +/* /dev/v4l2loopback interface */ + +struct v4l2_loopback_config { + /** + * the device-number (/dev/video) + * V4L2LOOPBACK_CTL_ADD: + * setting this to a value<0, will allocate an available one + * if nr>=0 and the device already exists, the ioctl will EEXIST + * if output_nr and capture_nr are the same, only a single device will be created + * NOTE: currently split-devices (where output_nr and capture_nr differ) + * are not implemented yet. + * until then, requesting different device-IDs will result in EINVAL. + * + * V4L2LOOPBACK_CTL_QUERY: + * either both output_nr and capture_nr must refer to the same loopback, + * or one (and only one) of them must be -1 + * + */ + int output_nr; + int unused; /*capture_nr;*/ + + /** + * a nice name for your device + * if (*card_label)==0, an automatic name is assigned + */ + char card_label[32]; + + /** + * allowed frame size + * if too low, default values are used + */ + unsigned int min_width; + unsigned int max_width; + unsigned int min_height; + unsigned int max_height; + + /** + * number of buffers to allocate for the queue + * if set to <=0, default values are used + */ + int max_buffers; + + /** + * how many consumers are allowed to open this device concurrently + * if set to <=0, default values are used + */ + int max_openers; + + /** + * set the debugging level for this device + */ + int debug; + + /** + * whether to announce OUTPUT/CAPTURE capabilities exclusively + * for this device or not + * (!exclusive_caps) + * NOTE: this is going to be removed once separate output/capture + * devices are implemented + */ + int announce_all_caps; +}; + +/* a pointer to a (struct v4l2_loopback_config) that has all values you wish to impose on the + * to-be-created device set. + * if the ptr is NULL, a new device is created with default values at the driver's discretion. + * + * returns the device_nr of the OUTPUT device (which can be used with V4L2LOOPBACK_CTL_QUERY, + * to get more information on the device) + */ +#define V4L2LOOPBACK_CTL_ADD 0x4C80 + +/* a pointer to a (struct v4l2_loopback_config) that has output_nr and/or capture_nr set + * (the two values must either refer to video-devices associated with the same loopback device + * or exactly one of them must be <0 + */ +#define V4L2LOOPBACK_CTL_QUERY 0x4C82 + +/* the device-number (either CAPTURE or OUTPUT) associated with the loopback-device */ +#define V4L2LOOPBACK_CTL_REMOVE 0x4C81 + +#endif /* _V4L2LOOPBACK_H */ diff --git a/drivers/media/v4l2-core/v4l2loopback_formats.h b/drivers/media/v4l2-core/v4l2loopback_formats.h new file mode 100644 index 000000000000..d855a3796554 --- /dev/null +++ b/drivers/media/v4l2-core/v4l2loopback_formats.h @@ -0,0 +1,445 @@ +static const struct v4l2l_format formats[] = { +#ifndef V4L2_PIX_FMT_VP9 +#define V4L2_PIX_FMT_VP9 v4l2_fourcc('V', 'P', '9', '0') +#endif +#ifndef V4L2_PIX_FMT_HEVC +#define V4L2_PIX_FMT_HEVC v4l2_fourcc('H', 'E', 'V', 'C') +#endif + + /* here come the packed formats */ + { + .name = "32 bpp RGB, le", + .fourcc = V4L2_PIX_FMT_BGR32, + .depth = 32, + .flags = 0, + }, + { + .name = "32 bpp RGB, be", + .fourcc = V4L2_PIX_FMT_RGB32, + .depth = 32, + .flags = 0, + }, + { + .name = "24 bpp RGB, le", + .fourcc = V4L2_PIX_FMT_BGR24, + .depth = 24, + .flags = 0, + }, + { + .name = "24 bpp RGB, be", + .fourcc = V4L2_PIX_FMT_RGB24, + .depth = 24, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_ABGR32 + { + .name = "32 bpp RGBA, le", + .fourcc = V4L2_PIX_FMT_ABGR32, + .depth = 32, + .flags = 0, + }, +#endif +#ifdef V4L2_PIX_FMT_RGBA32 + { + .name = "32 bpp RGBA", + .fourcc = V4L2_PIX_FMT_RGBA32, + .depth = 32, + .flags = 0, + }, +#endif +#ifdef V4L2_PIX_FMT_RGB332 + { + .name = "8 bpp RGB-3-3-2", + .fourcc = V4L2_PIX_FMT_RGB332, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB332 */ +#ifdef V4L2_PIX_FMT_RGB444 + { + .name = "16 bpp RGB (xxxxrrrr ggggbbbb)", + .fourcc = V4L2_PIX_FMT_RGB444, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB444 */ +#ifdef V4L2_PIX_FMT_RGB555 + { + .name = "16 bpp RGB-5-5-5", + .fourcc = V4L2_PIX_FMT_RGB555, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB555 */ +#ifdef V4L2_PIX_FMT_RGB565 + { + .name = "16 bpp RGB-5-6-5", + .fourcc = V4L2_PIX_FMT_RGB565, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB565 */ +#ifdef V4L2_PIX_FMT_RGB555X + { + .name = "16 bpp RGB-5-5-5 BE", + .fourcc = V4L2_PIX_FMT_RGB555X, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB555X */ +#ifdef V4L2_PIX_FMT_RGB565X + { + .name = "16 bpp RGB-5-6-5 BE", + .fourcc = V4L2_PIX_FMT_RGB565X, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB565X */ +#ifdef V4L2_PIX_FMT_BGR666 + { + .name = "18 bpp BGR-6-6-6", + .fourcc = V4L2_PIX_FMT_BGR666, + .depth = 18, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_BGR666 */ + { + .name = "4:2:2, packed, YUYV", + .fourcc = V4L2_PIX_FMT_YUYV, + .depth = 16, + .flags = 0, + }, + { + .name = "4:2:2, packed, UYVY", + .fourcc = V4L2_PIX_FMT_UYVY, + .depth = 16, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_YVYU + { + .name = "4:2:2, packed YVYU", + .fourcc = V4L2_PIX_FMT_YVYU, + .depth = 16, + .flags = 0, + }, +#endif +#ifdef V4L2_PIX_FMT_VYUY + { + .name = "4:2:2, packed VYUY", + .fourcc = V4L2_PIX_FMT_VYUY, + .depth = 16, + .flags = 0, + }, +#endif + { + .name = "4:2:2, packed YYUV", + .fourcc = V4L2_PIX_FMT_YYUV, + .depth = 16, + .flags = 0, + }, + { + .name = "YUV-8-8-8-8", + .fourcc = V4L2_PIX_FMT_YUV32, + .depth = 32, + .flags = 0, + }, + { + .name = "8 bpp, Greyscale", + .fourcc = V4L2_PIX_FMT_GREY, + .depth = 8, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_Y4 + { + .name = "4 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y4, + .depth = 4, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y4 */ +#ifdef V4L2_PIX_FMT_Y6 + { + .name = "6 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y6, + .depth = 6, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y6 */ +#ifdef V4L2_PIX_FMT_Y10 + { + .name = "10 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y10, + .depth = 10, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y10 */ +#ifdef V4L2_PIX_FMT_Y12 + { + .name = "12 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y12, + .depth = 12, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y12 */ + { + .name = "16 bpp, Greyscale", + .fourcc = V4L2_PIX_FMT_Y16, + .depth = 16, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_YUV444 + { + .name = "16 bpp xxxxyyyy uuuuvvvv", + .fourcc = V4L2_PIX_FMT_YUV444, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_YUV444 */ +#ifdef V4L2_PIX_FMT_YUV555 + { + .name = "16 bpp YUV-5-5-5", + .fourcc = V4L2_PIX_FMT_YUV555, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_YUV555 */ +#ifdef V4L2_PIX_FMT_YUV565 + { + .name = "16 bpp YUV-5-6-5", + .fourcc = V4L2_PIX_FMT_YUV565, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_YUV565 */ + +/* bayer formats */ +#ifdef V4L2_PIX_FMT_SRGGB8 + { + .name = "Bayer RGGB 8bit", + .fourcc = V4L2_PIX_FMT_SRGGB8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SRGGB8 */ +#ifdef V4L2_PIX_FMT_SGRBG8 + { + .name = "Bayer GRBG 8bit", + .fourcc = V4L2_PIX_FMT_SGRBG8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SGRBG8 */ +#ifdef V4L2_PIX_FMT_SGBRG8 + { + .name = "Bayer GBRG 8bit", + .fourcc = V4L2_PIX_FMT_SGBRG8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SGBRG8 */ +#ifdef V4L2_PIX_FMT_SBGGR8 + { + .name = "Bayer BA81 8bit", + .fourcc = V4L2_PIX_FMT_SBGGR8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SBGGR8 */ + + /* here come the planar formats */ + { + .name = "4:1:0, planar, Y-Cr-Cb", + .fourcc = V4L2_PIX_FMT_YVU410, + .depth = 9, + .flags = FORMAT_FLAGS_PLANAR, + }, + { + .name = "4:2:0, planar, Y-Cr-Cb", + .fourcc = V4L2_PIX_FMT_YVU420, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, + { + .name = "4:1:0, planar, Y-Cb-Cr", + .fourcc = V4L2_PIX_FMT_YUV410, + .depth = 9, + .flags = FORMAT_FLAGS_PLANAR, + }, + { + .name = "4:2:0, planar, Y-Cb-Cr", + .fourcc = V4L2_PIX_FMT_YUV420, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, +#ifdef V4L2_PIX_FMT_YUV422P + { + .name = "16 bpp YVU422 planar", + .fourcc = V4L2_PIX_FMT_YUV422P, + .depth = 16, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_YUV422P */ +#ifdef V4L2_PIX_FMT_YUV411P + { + .name = "16 bpp YVU411 planar", + .fourcc = V4L2_PIX_FMT_YUV411P, + .depth = 16, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_YUV411P */ +#ifdef V4L2_PIX_FMT_Y41P + { + .name = "12 bpp YUV 4:1:1", + .fourcc = V4L2_PIX_FMT_Y41P, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_Y41P */ +#ifdef V4L2_PIX_FMT_NV12 + { + .name = "12 bpp Y/CbCr 4:2:0 ", + .fourcc = V4L2_PIX_FMT_NV12, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_NV12 */ + +/* here come the compressed formats */ + +#ifdef V4L2_PIX_FMT_MJPEG + { + .name = "Motion-JPEG", + .fourcc = V4L2_PIX_FMT_MJPEG, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MJPEG */ +#ifdef V4L2_PIX_FMT_JPEG + { + .name = "JFIF JPEG", + .fourcc = V4L2_PIX_FMT_JPEG, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_JPEG */ +#ifdef V4L2_PIX_FMT_DV + { + .name = "DV1394", + .fourcc = V4L2_PIX_FMT_DV, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_DV */ +#ifdef V4L2_PIX_FMT_MPEG + { + .name = "MPEG-1/2/4 Multiplexed", + .fourcc = V4L2_PIX_FMT_MPEG, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG */ +#ifdef V4L2_PIX_FMT_H264 + { + .name = "H264 with start codes", + .fourcc = V4L2_PIX_FMT_H264, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H264 */ +#ifdef V4L2_PIX_FMT_H264_NO_SC + { + .name = "H264 without start codes", + .fourcc = V4L2_PIX_FMT_H264_NO_SC, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H264_NO_SC */ +#ifdef V4L2_PIX_FMT_H264_MVC + { + .name = "H264 MVC", + .fourcc = V4L2_PIX_FMT_H264_MVC, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H264_MVC */ +#ifdef V4L2_PIX_FMT_H263 + { + .name = "H263", + .fourcc = V4L2_PIX_FMT_H263, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H263 */ +#ifdef V4L2_PIX_FMT_MPEG1 + { + .name = "MPEG-1 ES", + .fourcc = V4L2_PIX_FMT_MPEG1, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG1 */ +#ifdef V4L2_PIX_FMT_MPEG2 + { + .name = "MPEG-2 ES", + .fourcc = V4L2_PIX_FMT_MPEG2, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG2 */ +#ifdef V4L2_PIX_FMT_MPEG4 + { + .name = "MPEG-4 part 2 ES", + .fourcc = V4L2_PIX_FMT_MPEG4, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG4 */ +#ifdef V4L2_PIX_FMT_XVID + { + .name = "Xvid", + .fourcc = V4L2_PIX_FMT_XVID, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_XVID */ +#ifdef V4L2_PIX_FMT_VC1_ANNEX_G + { + .name = "SMPTE 421M Annex G compliant stream", + .fourcc = V4L2_PIX_FMT_VC1_ANNEX_G, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VC1_ANNEX_G */ +#ifdef V4L2_PIX_FMT_VC1_ANNEX_L + { + .name = "SMPTE 421M Annex L compliant stream", + .fourcc = V4L2_PIX_FMT_VC1_ANNEX_L, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VC1_ANNEX_L */ +#ifdef V4L2_PIX_FMT_VP8 + { + .name = "VP8", + .fourcc = V4L2_PIX_FMT_VP8, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VP8 */ +#ifdef V4L2_PIX_FMT_VP9 + { + .name = "VP9", + .fourcc = V4L2_PIX_FMT_VP9, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VP9 */ +#ifdef V4L2_PIX_FMT_HEVC + { + .name = "HEVC", + .fourcc = V4L2_PIX_FMT_HEVC, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_HEVC */ +}; diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 4b023ee229cf..0fa6921e5a9e 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -2334,5 +2334,16 @@ config MFD_RSMU_SPI Additional drivers must be enabled in order to use the functionality of the device. +config MFD_STEAMDECK + tristate "Valve Steam Deck" + select MFD_CORE + depends on ACPI + depends on X86_64 || COMPILE_TEST + help + This driver registers various MFD cells that expose aspects + of Steam Deck specific ACPI functionality. + + Say N here, unless you are running on Steam Deck hardware. + endmenu endif diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index c66f07edcd0e..2bbdf4127232 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -284,3 +284,5 @@ rsmu-i2c-objs := rsmu_core.o rsmu_i2c.o rsmu-spi-objs := rsmu_core.o rsmu_spi.o obj-$(CONFIG_MFD_RSMU_I2C) += rsmu-i2c.o obj-$(CONFIG_MFD_RSMU_SPI) += rsmu-spi.o + +obj-$(CONFIG_MFD_STEAMDECK) += steamdeck.o diff --git a/drivers/mfd/steamdeck.c b/drivers/mfd/steamdeck.c new file mode 100644 index 000000000000..a60fa7db9141 --- /dev/null +++ b/drivers/mfd/steamdeck.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Steam Deck EC MFD core driver + * + * Copyright (C) 2021-2022 Valve Corporation + * + */ + +#include +#include +#include + +#define STEAMDECK_STA_OK \ + (ACPI_STA_DEVICE_ENABLED | \ + ACPI_STA_DEVICE_PRESENT | \ + ACPI_STA_DEVICE_FUNCTIONING) + +struct steamdeck { + struct acpi_device *adev; + struct device *dev; +}; + +#define STEAMDECK_ATTR_RO(_name, _method) \ + static ssize_t _name##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ + { \ + struct steamdeck *sd = dev_get_drvdata(dev); \ + unsigned long long val; \ + \ + if (ACPI_FAILURE(acpi_evaluate_integer( \ + sd->adev->handle, \ + _method, NULL, &val))) \ + return -EIO; \ + \ + return sysfs_emit(buf, "%llu\n", val); \ + } \ + static DEVICE_ATTR_RO(_name) + +STEAMDECK_ATTR_RO(firmware_version, "PDFW"); +STEAMDECK_ATTR_RO(board_id, "BOID"); + +static ssize_t controller_board_power_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct steamdeck *sd = dev_get_drvdata(dev); + bool enabled; + ssize_t ret = kstrtobool(buf, &enabled); + + if (ret) + return ret; + + if (ACPI_FAILURE(acpi_execute_simple_method(sd->adev->handle, + "SCBP", enabled))) + return -EIO; + + return count; +} +static DEVICE_ATTR_WO(controller_board_power); + +static struct attribute *steamdeck_attrs[] = { + &dev_attr_firmware_version.attr, + &dev_attr_board_id.attr, + &dev_attr_controller_board_power.attr, + NULL +}; + +ATTRIBUTE_GROUPS(steamdeck); + +static const struct mfd_cell steamdeck_cells[] = { + { .name = "steamdeck-hwmon" }, + { .name = "steamdeck-leds" }, + { .name = "steamdeck-extcon" }, +}; + +static void steamdeck_remove_sysfs_groups(void *data) +{ + struct steamdeck *sd = data; + + sysfs_remove_groups(&sd->dev->kobj, steamdeck_groups); +} + +static int steamdeck_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + unsigned long long sta; + struct steamdeck *sd; + acpi_status status; + int ret; + + sd = devm_kzalloc(dev, sizeof(*sd), GFP_KERNEL); + if (!sd) + return -ENOMEM; + sd->adev = ACPI_COMPANION(dev); + sd->dev = dev; + platform_set_drvdata(pdev, sd); + + status = acpi_evaluate_integer(sd->adev->handle, "_STA", + NULL, &sta); + if (ACPI_FAILURE(status)) { + dev_err(dev, "Status check failed (0x%x)\n", status); + return -EINVAL; + } + + if ((sta & STEAMDECK_STA_OK) != STEAMDECK_STA_OK) { + dev_err(dev, "Device is not ready\n"); + return -EINVAL; + } + + ret = sysfs_create_groups(&dev->kobj, steamdeck_groups); + if (ret) { + dev_err(dev, "Failed to create sysfs group\n"); + return ret; + } + + ret = devm_add_action_or_reset(dev, steamdeck_remove_sysfs_groups, + sd); + if (ret) { + dev_err(dev, "Failed to register devres action\n"); + return ret; + } + + return devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE, + steamdeck_cells, ARRAY_SIZE(steamdeck_cells), + NULL, 0, NULL); +} + +static const struct acpi_device_id steamdeck_device_ids[] = { + { "VLV0100", 0 }, + { "", 0 }, +}; +MODULE_DEVICE_TABLE(acpi, steamdeck_device_ids); + +static struct platform_driver steamdeck_driver = { + .probe = steamdeck_probe, + .driver = { + .name = "steamdeck", + .acpi_match_table = steamdeck_device_ids, + }, +}; +module_platform_driver(steamdeck_driver); + +MODULE_AUTHOR("Andrey Smirnov "); +MODULE_DESCRIPTION("Steam Deck EC MFD core driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index 0c6ecbb9a066..203e33c2233b 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -17,6 +17,7 @@ #include "hif.h" #include "wow.h" #include "fw.h" +#include "wmi.h" unsigned int ath11k_debug_mask; EXPORT_SYMBOL(ath11k_debug_mask); @@ -122,6 +123,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .tcl_ring_retry = true, .tx_ring_size = DP_TCL_DATA_RING_SIZE, .smp2p_wow_exit = false, + .coex_isolation = false, }, { .hw_rev = ATH11K_HW_IPQ6018_HW10, @@ -205,6 +207,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .tx_ring_size = DP_TCL_DATA_RING_SIZE, .smp2p_wow_exit = false, .support_fw_mac_sequence = false, + .coex_isolation = false, }, { .name = "qca6390 hw2.0", @@ -372,6 +375,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .tx_ring_size = DP_TCL_DATA_RING_SIZE, .smp2p_wow_exit = false, .support_fw_mac_sequence = false, + .coex_isolation = false, }, { .name = "wcn6855 hw2.0", @@ -411,6 +415,68 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .fragment_160mhz = false, }, + .interface_modes = BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_AP), + .supports_monitor = false, + .supports_shadow_regs = true, + .idle_ps = true, + .supports_sta_ps = true, + .coldboot_cal_mm = false, + .coldboot_cal_ftm = false, + .fw_mem_mode = 0, + .num_vdevs = 16 + 1, + .num_peers = 512, + .supports_suspend = true, + .hal_desc_sz = sizeof(struct hal_rx_desc_wcn6855), + .supports_regdb = true, + .fix_l1ss = false, + .credit_flow = true, + .max_tx_ring = DP_TCL_NUM_RING_MAX_QCA6390, + .hal_params = &ath11k_hw_hal_params_qca6390, + .supports_dynamic_smps_6ghz = false, + .alloc_cacheable_memory = false, + .supports_rssi_stats = true, + .fw_wmi_diag_event = true, + .current_cc_support = true, + .dbr_debug_support = false, + .coex_isolation = false, + }, + { + .name = "qca206x hw2.1", + .hw_rev = ATH11K_HW_QCA206X_HW21, + .fw = { + .dir = "QCA206X/hw2.1", + .board_size = 256 * 1024, + .cal_offset = 128 * 1024, + }, + .max_radios = 3, + .bdf_addr = 0x4B0C0000, + .hw_ops = &wcn6855_ops, + .ring_mask = &ath11k_hw_ring_mask_qca6390, + .internal_sleep_clock = true, + .regs = &wcn6855_regs, + .qmi_service_ins_id = ATH11K_QMI_WLFW_SERVICE_INS_ID_V01_QCA6390, + .host_ce_config = ath11k_host_ce_config_qca6390, + .ce_count = 9, + .target_ce_config = ath11k_target_ce_config_wlan_qca6390, + .target_ce_count = 9, + .svc_to_ce_map = ath11k_target_service_to_ce_map_wlan_qca6390, + .svc_to_ce_map_len = 14, + .single_pdev_only = true, + .rxdma1_enable = false, + .num_rxmda_per_pdev = 2, + .rx_mac_buf_ring = true, + .vdev_start_delay = true, + .htt_peer_map_v2 = false, + + .spectral = { + .fft_sz = 0, + .fft_pad_sz = 0, + .summary_pad_sz = 0, + .fft_hdr_len = 0, + .max_fft_bins = 0, + }, + .interface_modes = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_AP), .supports_monitor = false, @@ -457,6 +523,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .tx_ring_size = DP_TCL_DATA_RING_SIZE, .smp2p_wow_exit = false, .support_fw_mac_sequence = true, + .coex_isolation = false, }, { .name = "wcn6855 hw2.1", @@ -1573,6 +1640,18 @@ static void ath11k_core_pdev_destroy(struct ath11k_base *ab) ath11k_debugfs_pdev_destroy(ab); } +static int ath11k_core_config_coex_isolation(struct ath11k_base *ab) +{ + struct ath11k *ar = ath11k_ab_to_ar(ab, 0); + struct wmi_coex_config_params param; + + memset(¶m, 0, sizeof(struct wmi_coex_config_params)); + param.config_type = WMI_COEX_CONFIG_ANTENNA_ISOLATION; + param.config_arg1 = WMI_COEX_ISOLATION_ARG1_DEFAUT; + + return ath11k_wmi_send_coex_config(ar, ¶m); +} + static int ath11k_core_start(struct ath11k_base *ab) { int ret; @@ -1670,6 +1749,15 @@ static int ath11k_core_start(struct ath11k_base *ab) goto err_reo_cleanup; } + if (ab->hw_params.coex_isolation) { + ret = ath11k_core_config_coex_isolation(ab); + if (ret) { + ath11k_err(ab, "failed to set coex isolation: %d\n", + ret); + goto err_reo_cleanup; + } + } + return 0; err_reo_cleanup: diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index cd829ec70d76..ad0c86384c5b 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -147,6 +147,7 @@ enum ath11k_hw_rev { ATH11K_HW_WCN6855_HW21, ATH11K_HW_WCN6750_HW10, ATH11K_HW_IPQ5018_HW10, + ATH11K_HW_QCA206X_HW21, }; enum ath11k_firmware_mode { diff --git a/drivers/net/wireless/ath/ath11k/hw.c b/drivers/net/wireless/ath/ath11k/hw.c index 77d8f9237680..409eb565d3f6 100644 --- a/drivers/net/wireless/ath/ath11k/hw.c +++ b/drivers/net/wireless/ath/ath11k/hw.c @@ -100,6 +100,7 @@ static void ath11k_init_wmi_config_qca6390(struct ath11k_base *ab, config->num_wow_filters = 0x16; config->num_keep_alive_pattern = 0; config->flag1 |= WMI_RSRC_CFG_FLAG1_BSS_CHANNEL_INFO_64; + config->host_service_flags |= WMI_RSRC_CFG_HOST_SERVICE_FLAG_NAN_IFACE_SUPPORT; } static void ath11k_hw_ipq8074_reo_setup(struct ath11k_base *ab) diff --git a/drivers/net/wireless/ath/ath11k/hw.h b/drivers/net/wireless/ath/ath11k/hw.h index 1b070747a5db..20fb19e3d82a 100644 --- a/drivers/net/wireless/ath/ath11k/hw.h +++ b/drivers/net/wireless/ath/ath11k/hw.h @@ -206,6 +206,7 @@ struct ath11k_hw_params { bool fw_wmi_diag_event; bool current_cc_support; bool dbr_debug_support; + bool coex_isolation; bool global_reset; const struct cfg80211_sar_capa *bios_sar_capa; bool m3_fw_support; diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index cc80310088ce..c8409c00af2e 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -2474,7 +2474,7 @@ static void ath11k_peer_assoc_h_he(struct ath11k *ar, he_mcs_mask[i]) max_nss = i + 1; } - arg->peer_nss = min(sta->deflink.rx_nss, max_nss); + arg->peer_nss = min(arg->peer_nss, (u32)max_nss); if (arg->peer_phymode == MODE_11AX_HE160 || arg->peer_phymode == MODE_11AX_HE80_80) { @@ -4713,6 +4713,7 @@ static void ath11k_sta_rc_update_wk(struct work_struct *wk) nss = max_t(u32, 1, nss); nss = min(nss, ath11k_mac_max_nss(ht_mcs_mask, vht_mcs_mask, he_mcs_mask)); + nss = min(nss, (u32)ar->num_tx_chains); if (changed & IEEE80211_RC_BW_CHANGED) { /* Get the peer phymode */ @@ -9293,6 +9294,31 @@ static int ath11k_mac_setup_channels_rates(struct ath11k *ar, return 0; } +static void ath11k_mac_setup_mac_address_list(struct ath11k *ar) +{ + struct mac_address *addresses; + u16 n_addresses; + int i; + + if (!ar->ab->hw_params.single_pdev_only || ar->ab->hw_params.num_rxmda_per_pdev < 2) + return; + + n_addresses = 3; + addresses = kcalloc(n_addresses, sizeof(*addresses), GFP_KERNEL); + if (!addresses) + return; + + memcpy(addresses[0].addr, ar->mac_addr, ETH_ALEN); + for (i = 1; i < n_addresses; i++) { + memcpy(addresses[i].addr, ar->mac_addr, ETH_ALEN); + addresses[i].addr[0] |= 0x2; + addresses[i].addr[0] += (i - 1) << 4; + } + + ar->hw->wiphy->addresses = addresses; + ar->hw->wiphy->n_addresses = n_addresses; +} + static int ath11k_mac_setup_iface_combinations(struct ath11k *ar) { struct ath11k_base *ab = ar->ab; @@ -9312,28 +9338,43 @@ static int ath11k_mac_setup_iface_combinations(struct ath11k *ar) return -ENOMEM; } - limits[0].max = 1; - limits[0].types |= BIT(NL80211_IFTYPE_STATION); + if (ab->hw_params.single_pdev_only && ar->ab->hw_params.num_rxmda_per_pdev > 1) { + limits[0].max = 2; + limits[0].types |= BIT(NL80211_IFTYPE_STATION); - limits[1].max = 16; - limits[1].types |= BIT(NL80211_IFTYPE_AP); + limits[1].max = 1; + limits[1].types |= BIT(NL80211_IFTYPE_AP); - if (IS_ENABLED(CONFIG_MAC80211_MESH) && - ab->hw_params.interface_modes & BIT(NL80211_IFTYPE_MESH_POINT)) - limits[1].types |= BIT(NL80211_IFTYPE_MESH_POINT); - - combinations[0].limits = limits; - combinations[0].n_limits = n_limits; - combinations[0].max_interfaces = 16; - combinations[0].num_different_channels = 1; - combinations[0].beacon_int_infra_match = true; - combinations[0].beacon_int_min_gcd = 100; - combinations[0].radar_detect_widths = BIT(NL80211_CHAN_WIDTH_20_NOHT) | - BIT(NL80211_CHAN_WIDTH_20) | - BIT(NL80211_CHAN_WIDTH_40) | - BIT(NL80211_CHAN_WIDTH_80) | - BIT(NL80211_CHAN_WIDTH_80P80) | - BIT(NL80211_CHAN_WIDTH_160); + combinations[0].limits = limits; + combinations[0].n_limits = 2; + combinations[0].max_interfaces = 3; + combinations[0].num_different_channels = 2; + combinations[0].beacon_int_infra_match = true; + combinations[0].beacon_int_min_gcd = 100; + } else { + limits[0].max = 1; + limits[0].types |= BIT(NL80211_IFTYPE_STATION); + + limits[1].max = 16; + limits[1].types |= BIT(NL80211_IFTYPE_AP); + + if (IS_ENABLED(CONFIG_MAC80211_MESH) && + ab->hw_params.interface_modes & BIT(NL80211_IFTYPE_MESH_POINT)) + limits[1].types |= BIT(NL80211_IFTYPE_MESH_POINT); + + combinations[0].limits = limits; + combinations[0].n_limits = 2; + combinations[0].max_interfaces = 16; + combinations[0].num_different_channels = 1; + combinations[0].beacon_int_infra_match = true; + combinations[0].beacon_int_min_gcd = 100; + combinations[0].radar_detect_widths = BIT(NL80211_CHAN_WIDTH_20_NOHT) | + BIT(NL80211_CHAN_WIDTH_20) | + BIT(NL80211_CHAN_WIDTH_40) | + BIT(NL80211_CHAN_WIDTH_80) | + BIT(NL80211_CHAN_WIDTH_80P80) | + BIT(NL80211_CHAN_WIDTH_160); + } ar->hw->wiphy->iface_combinations = combinations; ar->hw->wiphy->n_iface_combinations = 1; @@ -9398,6 +9439,8 @@ static void __ath11k_mac_unregister(struct ath11k *ar) kfree(ar->hw->wiphy->iface_combinations[0].limits); kfree(ar->hw->wiphy->iface_combinations); + kfree(ar->hw->wiphy->addresses); + SET_IEEE80211_DEV(ar->hw, NULL); } @@ -9440,6 +9483,7 @@ static int __ath11k_mac_register(struct ath11k *ar) ath11k_pdev_caps_update(ar); SET_IEEE80211_PERM_ADDR(ar->hw, ar->mac_addr); + ath11k_mac_setup_mac_address_list(ar); SET_IEEE80211_DEV(ar->hw, ab->dev); diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c index 337590236485..583286ff7987 100644 --- a/drivers/net/wireless/ath/ath11k/mhi.c +++ b/drivers/net/wireless/ath/ath11k/mhi.c @@ -443,6 +443,7 @@ int ath11k_mhi_register(struct ath11k_pci *ab_pci) case ATH11K_HW_QCA6390_HW20: case ATH11K_HW_WCN6855_HW20: case ATH11K_HW_WCN6855_HW21: + case ATH11K_HW_QCA206X_HW21: ath11k_mhi_config = &ath11k_mhi_config_qca6390; break; default: diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c index 09e65c5e55c4..88f11f6e47b7 100644 --- a/drivers/net/wireless/ath/ath11k/pci.c +++ b/drivers/net/wireless/ath/ath11k/pci.c @@ -28,6 +28,8 @@ #define QCN9074_DEVICE_ID 0x1104 #define WCN6855_DEVICE_ID 0x1103 +#define SUB_VERSION 0x1910010 + static const struct pci_device_id ath11k_pci_id_table[] = { { PCI_VDEVICE(QCOM, QCA6390_DEVICE_ID) }, { PCI_VDEVICE(QCOM, WCN6855_DEVICE_ID) }, @@ -108,7 +110,12 @@ static u32 ath11k_pci_window_read32(struct ath11k_base *ab, u32 offset) struct ath11k_pci *ab_pci = ath11k_pci_priv(ab); u32 window_start, val; - window_start = ath11k_pci_get_window_start(ab, offset); + if (ab->hw_params.static_window_map) + window_start = ath11k_pci_get_window_start(ab, offset); + else + window_start = ATH11K_PCI_WINDOW_START; + + //window_start = ath11k_pci_get_window_start(ab, offset); if (window_start == ATH11K_PCI_WINDOW_START) { spin_lock_bh(&ab_pci->window_lock); @@ -733,6 +740,8 @@ static int ath11k_pci_probe(struct pci_dev *pdev, u32 soc_hw_version_major, soc_hw_version_minor, addr; const struct ath11k_pci_ops *pci_ops; int ret; + u32 sub_version; + int ops_init = 0; ab = ath11k_core_alloc(&pdev->dev, sizeof(*ab_pci), ATH11K_BUS_PCI); @@ -779,8 +788,8 @@ static int ath11k_pci_probe(struct pci_dev *pdev, case QCA6390_DEVICE_ID: ath11k_pci_read_hw_version(ab, &soc_hw_version_major, &soc_hw_version_minor); - switch (soc_hw_version_major) { - case 2: + switch (soc_hw_version_major) { + case 2: ab->hw_rev = ATH11K_HW_QCA6390_HW20; break; default: @@ -800,6 +809,13 @@ static int ath11k_pci_probe(struct pci_dev *pdev, ab->id.bdf_search = ATH11K_BDF_SEARCH_BUS_AND_BOARD; ath11k_pci_read_hw_version(ab, &soc_hw_version_major, &soc_hw_version_minor); + pci_ops = &ath11k_pci_ops_qca6390; + ret = ath11k_pcic_register_pci_ops(ab, pci_ops); + if (ret) { + ath11k_err(ab, "failed to register PCI ops: %d\n", ret); + goto err_pci_free_region; + } + ops_init = 1; switch (soc_hw_version_major) { case 2: switch (soc_hw_version_minor) { @@ -809,7 +825,19 @@ static int ath11k_pci_probe(struct pci_dev *pdev, break; case 0x10: case 0x11: - ab->hw_rev = ATH11K_HW_WCN6855_HW21; + //ab->hw_rev = ATH11K_HW_WCN6855_HW21; + sub_version = ath11k_pcic_read32(ab, SUB_VERSION); + ath11k_dbg(ab, ATH11K_DBG_PCI, "sub_version 0x%x\n", sub_version); + switch (sub_version) { + case 0x1019A0E1: + case 0x1019B0E1: + case 0x1019C0E1: + case 0x1019D0E1: + ab->hw_rev = ATH11K_HW_QCA206X_HW21; + break; + default: + ab->hw_rev = ATH11K_HW_WCN6855_HW21; + } break; default: goto unsupported_wcn6855_soc; @@ -823,7 +851,6 @@ static int ath11k_pci_probe(struct pci_dev *pdev, goto err_pci_free_region; } - pci_ops = &ath11k_pci_ops_qca6390; break; default: dev_err(&pdev->dev, "Unknown PCI device found: 0x%x\n", @@ -832,11 +859,13 @@ static int ath11k_pci_probe(struct pci_dev *pdev, goto err_pci_free_region; } - ret = ath11k_pcic_register_pci_ops(ab, pci_ops); - if (ret) { - ath11k_err(ab, "failed to register PCI ops: %d\n", ret); - goto err_pci_free_region; - } + if(ops_init == 1){ + ret = ath11k_pcic_register_pci_ops(ab, pci_ops); + if (ret) { + ath11k_err(ab, "failed to register PCI ops: %d\n", ret); + goto err_pci_free_region; + } + } ret = ath11k_pcic_init_msi_config(ab); if (ret) { @@ -1021,6 +1050,7 @@ static struct pci_driver ath11k_pci_driver = { static int ath11k_pci_init(void) { int ret; + u32 sub_version; ret = pci_register_driver(&ath11k_pci_driver); if (ret) diff --git a/drivers/net/wireless/ath/ath11k/pcic.c b/drivers/net/wireless/ath/ath11k/pcic.c index 15e2ceb22a44..c7b4b897d946 100644 --- a/drivers/net/wireless/ath/ath11k/pcic.c +++ b/drivers/net/wireless/ath/ath11k/pcic.c @@ -115,6 +115,17 @@ static const struct ath11k_msi_config ath11k_msi_config[] = { }, .hw_rev = ATH11K_HW_WCN6750_HW10, }, + { + .total_vectors = 32, + .total_users = 4, + .users = (struct ath11k_msi_user[]) { + { .name = "MHI", .num_vectors = 3, .base_vector = 0 }, + { .name = "CE", .num_vectors = 10, .base_vector = 3 }, + { .name = "WAKE", .num_vectors = 1, .base_vector = 13 }, + { .name = "DP", .num_vectors = 18, .base_vector = 14 }, + }, + .hw_rev = ATH11K_HW_QCA206X_HW21, + }, }; int ath11k_pcic_init_msi_config(struct ath11k_base *ab) diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 442afda7ec88..2abf307a793f 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -4069,6 +4069,7 @@ ath11k_wmi_copy_resource_config(struct wmi_resource_config *wmi_cfg, wmi_cfg->sched_params = tg_cfg->sched_params; wmi_cfg->twt_ap_pdev_count = tg_cfg->twt_ap_pdev_count; wmi_cfg->twt_ap_sta_count = tg_cfg->twt_ap_sta_count; + wmi_cfg->host_service_flags = tg_cfg->host_service_flags; wmi_cfg->host_service_flags &= ~(1 << WMI_CFG_HOST_SERVICE_FLAG_REG_CC_EXT); wmi_cfg->host_service_flags |= (tg_cfg->is_reg_cc_ext_event_supported << @@ -9688,3 +9689,29 @@ int ath11k_wmi_sta_keepalive(struct ath11k *ar, return ath11k_wmi_cmd_send(wmi, skb, WMI_STA_KEEPALIVE_CMDID); } + +int ath11k_wmi_send_coex_config(struct ath11k *ar, + struct wmi_coex_config_params *param) +{ + struct ath11k_pdev_wmi *wmi = ar->wmi; + struct wmi_coex_config_cmd *cmd; + struct sk_buff *skb; + + skb = ath11k_wmi_alloc_skb(wmi->wmi_ab, sizeof(*cmd)); + if (!skb) + return -ENOMEM; + + cmd = (struct wmi_coex_config_cmd *)skb->data; + cmd->tlv_header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_COEX_CONFIG_CMD) | + FIELD_PREP(WMI_TLV_LEN, sizeof(*cmd) - TLV_HDR_SIZE); + cmd->vdev_id = param->vdev_id; + cmd->config_type = param->config_type; + cmd->config_arg1 = param->config_arg1; + cmd->config_arg2 = param->config_arg2; + cmd->config_arg3 = param->config_arg3; + cmd->config_arg4 = param->config_arg4; + cmd->config_arg5 = param->config_arg5; + cmd->config_arg6 = param->config_arg6; + + return ath11k_wmi_cmd_send(wmi, skb, WMI_COEX_CONFIG_CMDID); +} diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h index cd2098d78e86..7cfe0c0f522f 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.h +++ b/drivers/net/wireless/ath/ath11k/wmi.h @@ -2332,6 +2332,7 @@ struct wmi_init_cmd { #define WMI_RSRC_CFG_FLAG1_BSS_CHANNEL_INFO_64 BIT(5) #define WMI_RSRC_CFG_FLAG2_CALC_NEXT_DTIM_COUNT_SET BIT(9) #define WMI_RSRC_CFG_FLAG1_ACK_RSSI BIT(18) +#define WMI_RSRC_CFG_HOST_SERVICE_FLAG_NAN_IFACE_SUPPORT BIT(0) #define WMI_CFG_HOST_SERVICE_FLAG_REG_CC_EXT 4 @@ -5677,6 +5678,15 @@ struct target_resource_config { u8 is_reg_cc_ext_event_supported; u32 ema_max_vap_cnt; u32 ema_max_profile_period; + u32 max_nlo_ssids; + u32 num_packet_filters; + u32 num_max_sta_vdevs; + u32 max_bssid_indicator; + u32 ul_resp_config; + u32 msdu_flow_override_config0; + u32 msdu_flow_override_config1; + u32 flags2; + u32 host_service_flags; }; enum wmi_debug_log_param { @@ -6298,6 +6308,82 @@ enum wmi_sta_keepalive_method { const void **ath11k_wmi_tlv_parse_alloc(struct ath11k_base *ab, const void *ptr, size_t len, gfp_t gfp); +enum wmi_coex_config_type { + WMI_COEX_CONFIG_PAGE_P2P_TDM = 1, + WMI_COEX_CONFIG_PAGE_STA_TDM = 2, + WMI_COEX_CONFIG_PAGE_SAP_TDM = 3, + WMI_COEX_CONFIG_DURING_WLAN_CONN = 4, + WMI_COEX_CONFIG_BTC_ENABLE = 5, + WMI_COEX_CONFIG_COEX_DBG = 6, + WMI_COEX_CONFIG_PAGE_P2P_STA_TDM = 7, + WMI_COEX_CONFIG_INQUIRY_P2P_TDM = 8, + WMI_COEX_CONFIG_INQUIRY_STA_TDM = 9, + WMI_COEX_CONFIG_INQUIRY_SAP_TDM = 10, + WMI_COEX_CONFIG_INQUIRY_P2P_STA_TDM = 11, + WMI_COEX_CONFIG_TX_POWER = 12, + WMI_COEX_CONFIG_PTA_CONFIG = 13, + WMI_COEX_CONFIG_AP_TDM = 14, + WMI_COEX_CONFIG_WLAN_SCAN_PRIORITY = 15, + WMI_COEX_CONFIG_WLAN_PKT_PRIORITY = 16, + WMI_COEX_CONFIG_PTA_INTERFACE = 17, + WMI_COEX_CONFIG_BTC_DUTYCYCLE = 18, + WMI_COEX_CONFIG_HANDOVER_RSSI = 19, + WMI_COEX_CONFIG_PTA_BT_INFO = 20, + WMI_COEX_CONFIG_SINK_WLAN_TDM = 21, + WMI_COEX_CONFIG_COEX_ENABLE_MCC_TDM = 22, + WMI_COEX_CONFIG_LOWRSSI_A2DPOPP_TDM = 23, + WMI_COEX_CONFIG_BTC_MODE = 24, + WMI_COEX_CONFIG_ANTENNA_ISOLATION = 25, + WMI_COEX_CONFIG_BT_LOW_RSSI_THRESHOLD = 26, + WMI_COEX_CONFIG_BT_INTERFERENCE_LEVEL = 27, + WMI_COEX_CONFIG_WLAN_OVER_ZBLOW = 28, + WMI_COEX_CONFIG_WLAN_MGMT_OVER_BT_A2DP = 29, + WMI_COEX_CONFIG_WLAN_CONN_OVER_LE = 30, + WMI_COEX_CONFIG_LE_OVER_WLAN_TRAFFIC = 31, + WMI_COEX_CONFIG_THREE_WAY_COEX_RESET = 32, + WMI_COEX_CONFIG_THREE_WAY_DELAY_PARA = 33, + WMI_COEX_CONFIG_THREE_WAY_COEX_START = 34, + WMI_COEX_CONFIG_MPTA_HELPER_ENABLE = 35, + WMI_COEX_CONFIG_MPTA_HELPER_ZIGBEE_STATE = 36, + WMI_COEX_CONFIG_MPTA_HELPER_INT_OCS_PARAMS = 37, + WMI_COEX_CONFIG_MPTA_HELPER_MON_OCS_PARAMS = 38, + WMI_COEX_CONFIG_MPTA_HELPER_INT_MON_DURATION = 39, + WMI_COEX_CONFIG_MPTA_HELPER_ZIGBEE_CHANNEL = 40, + WMI_COEX_CONFIG_MPTA_HELPER_WLAN_MUTE_DURATION = 41, + WMI_COEX_CONFIG_BT_SCO_ALLOW_WLAN_2G_SCAN = 42, + WMI_COEX_CONFIG_ENABLE_2ND_HARMONIC_WAR = 43, + WMI_COEX_CONFIG_BTCOEX_SEPARATE_CHAIN_MODE = 44, + WMI_COEX_CONFIG_ENABLE_TPUT_SHAPING = 45, + WMI_COEX_CONFIG_ENABLE_TXBF = 46, + WMI_COEX_CONFIG_FORCED_ALGO = 47, + WMI_COEX_CONFIG_LE_SCAN_POLICY = 48, +}; + +struct wmi_coex_config_params { + u32 vdev_id; + u32 config_type; + u32 config_arg1; + u32 config_arg2; + u32 config_arg3; + u32 config_arg4; + u32 config_arg5; + u32 config_arg6; +}; + +struct wmi_coex_config_cmd { + u32 tlv_header; + u32 vdev_id; + u32 config_type; + u32 config_arg1; + u32 config_arg2; + u32 config_arg3; + u32 config_arg4; + u32 config_arg5; + u32 config_arg6; +} __packed; + +#define WMI_COEX_ISOLATION_ARG1_DEFAUT 30 + int ath11k_wmi_cmd_send(struct ath11k_pdev_wmi *wmi, struct sk_buff *skb, u32 cmd_id); struct sk_buff *ath11k_wmi_alloc_skb(struct ath11k_wmi_base *wmi_sc, u32 len); @@ -6459,6 +6545,7 @@ int ath11k_wmi_scan_prob_req_oui(struct ath11k *ar, const u8 mac_addr[ETH_ALEN]); int ath11k_wmi_fw_dbglog_cfg(struct ath11k *ar, u32 *module_id_bitmap, struct ath11k_fw_dbglog *dbglog); +int ath11k_wmi_send_coex_config(struct ath11k *ar, struct wmi_coex_config_params *param); int ath11k_wmi_wow_config_pno(struct ath11k *ar, u32 vdev_id, struct wmi_pno_scan_req *pno_scan); int ath11k_wmi_wow_del_pattern(struct ath11k *ar, u32 vdev_id, u32 pattern_id); diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile index f2b19e6174af..4fef4b174321 100644 --- a/drivers/pci/controller/Makefile +++ b/drivers/pci/controller/Makefile @@ -1,4 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 +ifdef CONFIG_X86_64 +ifdef CONFIG_SATA_AHCI +obj-y += intel-nvme-remap.o +endif +endif + obj-$(CONFIG_PCIE_CADENCE) += cadence/ obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o obj-$(CONFIG_PCI_IXP4XX) += pci-ixp4xx.o diff --git a/drivers/pci/controller/intel-nvme-remap.c b/drivers/pci/controller/intel-nvme-remap.c new file mode 100644 index 000000000000..e105e6f5cc91 --- /dev/null +++ b/drivers/pci/controller/intel-nvme-remap.c @@ -0,0 +1,462 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel remapped NVMe device support. + * + * Copyright (c) 2019 Endless Mobile, Inc. + * Author: Daniel Drake + * + * Some products ship by default with the SATA controller in "RAID" or + * "Intel RST Premium With Intel Optane System Acceleration" mode. Under this + * mode, which we refer to as "remapped NVMe" mode, any installed NVMe + * devices disappear from the PCI bus, and instead their I/O memory becomes + * available within the AHCI device BARs. + * + * This scheme is understood to be a way of avoiding usage of the standard + * Windows NVMe driver under that OS, instead mandating usage of Intel's + * driver instead, which has better power management, and presumably offers + * some RAID/disk-caching solutions too. + * + * Here in this driver, we support the remapped NVMe mode by claiming the + * AHCI device and creating a fake PCIe root port. On the new bus, the + * original AHCI device is exposed with only minor tweaks. Then, fake PCI + * devices corresponding to the remapped NVMe devices are created. The usual + * ahci and nvme drivers are then expected to bind to these devices and + * operate as normal. + * + * The PCI configuration space for the NVMe devices is completely + * unavailable, so we fake a minimal one and hope for the best. + * + * Interrupts are shared between the AHCI and NVMe devices. For simplicity, + * we only support the legacy interrupt here, although MSI support + * could potentially be added later. + */ + +#define MODULE_NAME "intel-nvme-remap" + +#include +#include +#include +#include +#include + +#define AHCI_PCI_BAR_STANDARD 5 + +struct nvme_remap_dev { + struct pci_dev *dev; /* AHCI device */ + struct pci_bus *bus; /* our fake PCI bus */ + struct pci_sysdata sysdata; + int irq_base; /* our fake interrupts */ + + /* + * When we detect an all-ones write to a BAR register, this flag + * is set, so that we return the BAR size on the next read (a + * standard PCI behaviour). + * This includes the assumption that an all-ones BAR write is + * immediately followed by a read of the same register. + */ + bool bar_sizing; + + /* + * Resources copied from the AHCI device, to be regarded as + * resources on our fake bus. + */ + struct resource ahci_resources[PCI_NUM_RESOURCES]; + + /* Resources corresponding to the NVMe devices. */ + struct resource remapped_dev_mem[AHCI_MAX_REMAP]; + + /* Number of remapped NVMe devices found. */ + int num_remapped_devices; +}; + +static inline struct nvme_remap_dev *nrdev_from_bus(struct pci_bus *bus) +{ + return container_of(bus->sysdata, struct nvme_remap_dev, sysdata); +} + + +/******** PCI configuration space **********/ + +/* + * Helper macros for tweaking returned contents of PCI configuration space. + * + * value contains len bytes of data read from reg. + * If fixup_reg is included in that range, fix up the contents of that + * register to fixed_value. + */ +#define NR_FIX8(fixup_reg, fixed_value) do { \ + if (reg <= fixup_reg && fixup_reg < reg + len) \ + ((u8 *) value)[fixup_reg - reg] = (u8) (fixed_value); \ + } while (0) + +#define NR_FIX16(fixup_reg, fixed_value) do { \ + NR_FIX8(fixup_reg, fixed_value); \ + NR_FIX8(fixup_reg + 1, fixed_value >> 8); \ + } while (0) + +#define NR_FIX24(fixup_reg, fixed_value) do { \ + NR_FIX8(fixup_reg, fixed_value); \ + NR_FIX8(fixup_reg + 1, fixed_value >> 8); \ + NR_FIX8(fixup_reg + 2, fixed_value >> 16); \ + } while (0) + +#define NR_FIX32(fixup_reg, fixed_value) do { \ + NR_FIX16(fixup_reg, (u16) fixed_value); \ + NR_FIX16(fixup_reg + 2, fixed_value >> 16); \ + } while (0) + +/* + * Read PCI config space of the slot 0 (AHCI) device. + * We pass through the read request to the underlying device, but + * tweak the results in some cases. + */ +static int nvme_remap_pci_read_slot0(struct pci_bus *bus, int reg, + int len, u32 *value) +{ + struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); + struct pci_bus *ahci_dev_bus = nrdev->dev->bus; + int ret; + + ret = ahci_dev_bus->ops->read(ahci_dev_bus, nrdev->dev->devfn, + reg, len, value); + if (ret) + return ret; + + /* + * Adjust the device class, to prevent this driver from attempting to + * additionally probe the device we're simulating here. + */ + NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_SATA_AHCI); + + /* + * Unset interrupt pin, otherwise ACPI tries to find routing + * info for our virtual IRQ, fails, and complains. + */ + NR_FIX8(PCI_INTERRUPT_PIN, 0); + + /* + * Truncate the AHCI BAR to not include the region that covers the + * hidden devices. This will cause the ahci driver to successfully + * probe th new device (instead of handing it over to this driver). + */ + if (nrdev->bar_sizing) { + NR_FIX32(PCI_BASE_ADDRESS_5, ~(SZ_16K - 1)); + nrdev->bar_sizing = false; + } + + return PCIBIOS_SUCCESSFUL; +} + +/* + * Read PCI config space of a remapped device. + * Since the original PCI config space is inaccessible, we provide a minimal, + * fake config space instead. + */ +static int nvme_remap_pci_read_remapped(struct pci_bus *bus, unsigned int port, + int reg, int len, u32 *value) +{ + struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); + struct resource *remapped_mem; + + if (port > nrdev->num_remapped_devices) + return PCIBIOS_DEVICE_NOT_FOUND; + + *value = 0; + remapped_mem = &nrdev->remapped_dev_mem[port - 1]; + + /* Set a Vendor ID, otherwise Linux assumes no device is present */ + NR_FIX16(PCI_VENDOR_ID, PCI_VENDOR_ID_INTEL); + + /* Always appear on & bus mastering */ + NR_FIX16(PCI_COMMAND, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); + + /* Set class so that nvme driver probes us */ + NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_EXPRESS); + + if (nrdev->bar_sizing) { + NR_FIX32(PCI_BASE_ADDRESS_0, + ~(resource_size(remapped_mem) - 1)); + nrdev->bar_sizing = false; + } else { + resource_size_t mem_start = remapped_mem->start; + + mem_start |= PCI_BASE_ADDRESS_MEM_TYPE_64; + NR_FIX32(PCI_BASE_ADDRESS_0, mem_start); + mem_start >>= 32; + NR_FIX32(PCI_BASE_ADDRESS_1, mem_start); + } + + return PCIBIOS_SUCCESSFUL; +} + +/* Read PCI configuration space. */ +static int nvme_remap_pci_read(struct pci_bus *bus, unsigned int devfn, + int reg, int len, u32 *value) +{ + if (PCI_SLOT(devfn) == 0) + return nvme_remap_pci_read_slot0(bus, reg, len, value); + else + return nvme_remap_pci_read_remapped(bus, PCI_SLOT(devfn), + reg, len, value); +} + +/* + * Write PCI config space of the slot 0 (AHCI) device. + * Apart from the special case of BAR sizing, we disable all writes. + * Otherwise, the ahci driver could make changes (e.g. unset PCI bus master) + * that would affect the operation of the NVMe devices. + */ +static int nvme_remap_pci_write_slot0(struct pci_bus *bus, int reg, + int len, u32 value) +{ + struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); + struct pci_bus *ahci_dev_bus = nrdev->dev->bus; + + if (reg >= PCI_BASE_ADDRESS_0 && reg <= PCI_BASE_ADDRESS_5) { + /* + * Writing all-ones to a BAR means that the size of the + * memory region is being checked. Flag this so that we can + * reply with an appropriate size on the next read. + */ + if (value == ~0) + nrdev->bar_sizing = true; + + return ahci_dev_bus->ops->write(ahci_dev_bus, + nrdev->dev->devfn, + reg, len, value); + } + + return PCIBIOS_SET_FAILED; +} + +/* + * Write PCI config space of a remapped device. + * Since the original PCI config space is inaccessible, we reject all + * writes, except for the special case of BAR probing. + */ +static int nvme_remap_pci_write_remapped(struct pci_bus *bus, + unsigned int port, + int reg, int len, u32 value) +{ + struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); + + if (port > nrdev->num_remapped_devices) + return PCIBIOS_DEVICE_NOT_FOUND; + + /* + * Writing all-ones to a BAR means that the size of the memory + * region is being checked. Flag this so that we can reply with + * an appropriate size on the next read. + */ + if (value == ~0 && reg >= PCI_BASE_ADDRESS_0 + && reg <= PCI_BASE_ADDRESS_5) { + nrdev->bar_sizing = true; + return PCIBIOS_SUCCESSFUL; + } + + return PCIBIOS_SET_FAILED; +} + +/* Write PCI configuration space. */ +static int nvme_remap_pci_write(struct pci_bus *bus, unsigned int devfn, + int reg, int len, u32 value) +{ + if (PCI_SLOT(devfn) == 0) + return nvme_remap_pci_write_slot0(bus, reg, len, value); + else + return nvme_remap_pci_write_remapped(bus, PCI_SLOT(devfn), + reg, len, value); +} + +static struct pci_ops nvme_remap_pci_ops = { + .read = nvme_remap_pci_read, + .write = nvme_remap_pci_write, +}; + + +/******** Initialization & exit **********/ + +/* + * Find a PCI domain ID to use for our fake bus. + * Start at 0x10000 to not clash with ACPI _SEG domains (16 bits). + */ +static int find_free_domain(void) +{ + int domain = 0xffff; + struct pci_bus *bus = NULL; + + while ((bus = pci_find_next_bus(bus)) != NULL) + domain = max_t(int, domain, pci_domain_nr(bus)); + + return domain + 1; +} + +static int find_remapped_devices(struct nvme_remap_dev *nrdev, + struct list_head *resources) +{ + void __iomem *mmio; + int i, count = 0; + u32 cap; + + mmio = pcim_iomap(nrdev->dev, AHCI_PCI_BAR_STANDARD, + pci_resource_len(nrdev->dev, + AHCI_PCI_BAR_STANDARD)); + if (!mmio) + return -ENODEV; + + /* Check if this device might have remapped nvme devices. */ + if (pci_resource_len(nrdev->dev, AHCI_PCI_BAR_STANDARD) < SZ_512K || + !(readl(mmio + AHCI_VSCAP) & 1)) + return -ENODEV; + + cap = readq(mmio + AHCI_REMAP_CAP); + for (i = AHCI_MAX_REMAP-1; i >= 0; i--) { + struct resource *remapped_mem; + + if ((cap & (1 << i)) == 0) + continue; + if (readl(mmio + ahci_remap_dcc(i)) + != PCI_CLASS_STORAGE_EXPRESS) + continue; + + /* We've found a remapped device */ + remapped_mem = &nrdev->remapped_dev_mem[count++]; + remapped_mem->start = + pci_resource_start(nrdev->dev, AHCI_PCI_BAR_STANDARD) + + ahci_remap_base(i); + remapped_mem->end = remapped_mem->start + + AHCI_REMAP_N_SIZE - 1; + remapped_mem->flags = IORESOURCE_MEM | IORESOURCE_PCI_FIXED; + pci_add_resource(resources, remapped_mem); + } + + pcim_iounmap(nrdev->dev, mmio); + + if (count == 0) + return -ENODEV; + + nrdev->num_remapped_devices = count; + dev_info(&nrdev->dev->dev, "Found %d remapped NVMe devices\n", + nrdev->num_remapped_devices); + return 0; +} + +static void nvme_remap_remove_root_bus(void *data) +{ + struct pci_bus *bus = data; + + pci_stop_root_bus(bus); + pci_remove_root_bus(bus); +} + +static int nvme_remap_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + struct nvme_remap_dev *nrdev; + LIST_HEAD(resources); + int i; + int ret; + struct pci_dev *child; + + nrdev = devm_kzalloc(&dev->dev, sizeof(*nrdev), GFP_KERNEL); + nrdev->sysdata.domain = find_free_domain(); + nrdev->sysdata.nvme_remap_dev = dev; + nrdev->dev = dev; + pci_set_drvdata(dev, nrdev); + + ret = pcim_enable_device(dev); + if (ret < 0) + return ret; + + pci_set_master(dev); + + ret = find_remapped_devices(nrdev, &resources); + if (ret) + return ret; + + /* Add resources from the original AHCI device */ + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *res = &dev->resource[i]; + + if (res->start) { + struct resource *nr_res = &nrdev->ahci_resources[i]; + + nr_res->start = res->start; + nr_res->end = res->end; + nr_res->flags = res->flags; + pci_add_resource(&resources, nr_res); + } + } + + /* Create virtual interrupts */ + nrdev->irq_base = devm_irq_alloc_descs(&dev->dev, -1, 0, + nrdev->num_remapped_devices + 1, + 0); + if (nrdev->irq_base < 0) + return nrdev->irq_base; + + /* Create and populate PCI bus */ + nrdev->bus = pci_create_root_bus(&dev->dev, 0, &nvme_remap_pci_ops, + &nrdev->sysdata, &resources); + if (!nrdev->bus) + return -ENODEV; + + if (devm_add_action_or_reset(&dev->dev, nvme_remap_remove_root_bus, + nrdev->bus)) + return -ENOMEM; + + /* We don't support sharing MSI interrupts between these devices */ + nrdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_MSI; + + pci_scan_child_bus(nrdev->bus); + + list_for_each_entry(child, &nrdev->bus->devices, bus_list) { + /* + * Prevent PCI core from trying to move memory BARs around. + * The hidden NVMe devices are at fixed locations. + */ + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *res = &child->resource[i]; + + if (res->flags & IORESOURCE_MEM) + res->flags |= IORESOURCE_PCI_FIXED; + } + + /* Share the legacy IRQ between all devices */ + child->irq = dev->irq; + } + + pci_assign_unassigned_bus_resources(nrdev->bus); + pci_bus_add_devices(nrdev->bus); + + return 0; +} + +static const struct pci_device_id nvme_remap_ids[] = { + /* + * Match all Intel RAID controllers. + * + * There's overlap here with the set of devices detected by the ahci + * driver, but ahci will only successfully probe when there + * *aren't* any remapped NVMe devices, and this driver will only + * successfully probe when there *are* remapped NVMe devices that + * need handling. + */ + { + PCI_VDEVICE(INTEL, PCI_ANY_ID), + .class = PCI_CLASS_STORAGE_RAID << 8, + .class_mask = 0xffffff00, + }, + {0,} +}; +MODULE_DEVICE_TABLE(pci, nvme_remap_ids); + +static struct pci_driver nvme_remap_drv = { + .name = MODULE_NAME, + .id_table = nvme_remap_ids, + .probe = nvme_remap_probe, +}; +module_pci_driver(nvme_remap_drv); + +MODULE_AUTHOR("Daniel Drake "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index eff7f5df08e2..bd03ff857031 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -697,7 +697,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RS100, quirk_ati_ /* * In the AMD NL platform, this device ([1022:7912]) has a class code of * PCI_CLASS_SERIAL_USB_XHCI (0x0c0330), which means the xhci driver will - * claim it. The same applies on the VanGogh platform device ([1022:163a]). + * claim it. * * But the dwc3 driver is a more specific driver for this device, and we'd * prefer to use it instead of xhci. To prevent xhci from claiming the @@ -705,7 +705,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RS100, quirk_ati_ * defines as "USB device (not host controller)". The dwc3 driver can then * claim it based on its Vendor and Device ID. */ -static void quirk_amd_dwc_class(struct pci_dev *pdev) +static void quirk_amd_nl_class(struct pci_dev *pdev) { u32 class = pdev->class; @@ -718,9 +718,7 @@ static void quirk_amd_dwc_class(struct pci_dev *pdev) } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB, - quirk_amd_dwc_class); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VANGOGH_USB, - quirk_amd_dwc_class); + quirk_amd_nl_class); /* * Synopsys USB 3.x host HAPS platform has a class code of @@ -3732,6 +3730,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev) dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET; } +static bool acs_on_downstream; +static bool acs_on_multifunction; + +#define NUM_ACS_IDS 16 +struct acs_on_id { + unsigned short vendor; + unsigned short device; +}; +static struct acs_on_id acs_on_ids[NUM_ACS_IDS]; +static u8 max_acs_id; + +static __init int pcie_acs_override_setup(char *p) +{ + if (!p) + return -EINVAL; + + while (*p) { + if (!strncmp(p, "downstream", 10)) + acs_on_downstream = true; + if (!strncmp(p, "multifunction", 13)) + acs_on_multifunction = true; + if (!strncmp(p, "id:", 3)) { + char opt[5]; + int ret; + long val; + + if (max_acs_id >= NUM_ACS_IDS - 1) { + pr_warn("Out of PCIe ACS override slots (%d)\n", + NUM_ACS_IDS); + goto next; + } + + p += 3; + snprintf(opt, 5, "%s", p); + ret = kstrtol(opt, 16, &val); + if (ret) { + pr_warn("PCIe ACS ID parse error %d\n", ret); + goto next; + } + acs_on_ids[max_acs_id].vendor = val; + + p += strcspn(p, ":"); + if (*p != ':') { + pr_warn("PCIe ACS invalid ID\n"); + goto next; + } + + p++; + snprintf(opt, 5, "%s", p); + ret = kstrtol(opt, 16, &val); + if (ret) { + pr_warn("PCIe ACS ID parse error %d\n", ret); + goto next; + } + acs_on_ids[max_acs_id].device = val; + max_acs_id++; + } +next: + p += strcspn(p, ","); + if (*p == ',') + p++; + } + + if (acs_on_downstream || acs_on_multifunction || max_acs_id) + pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n"); + + return 0; +} +early_param("pcie_acs_override", pcie_acs_override_setup); + +static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags) +{ + int i; + + /* Never override ACS for legacy devices or devices with ACS caps */ + if (!pci_is_pcie(dev) || + pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS)) + return -ENOTTY; + + for (i = 0; i < max_acs_id; i++) + if (acs_on_ids[i].vendor == dev->vendor && + acs_on_ids[i].device == dev->device) + return 1; + + switch (pci_pcie_type(dev)) { + case PCI_EXP_TYPE_DOWNSTREAM: + case PCI_EXP_TYPE_ROOT_PORT: + if (acs_on_downstream) + return 1; + break; + case PCI_EXP_TYPE_ENDPOINT: + case PCI_EXP_TYPE_UPSTREAM: + case PCI_EXP_TYPE_LEG_END: + case PCI_EXP_TYPE_RC_END: + if (acs_on_multifunction && dev->multifunction) + return 1; + } + + return -ENOTTY; +} /* * Some NVIDIA GPU devices do not work with bus reset, SBR needs to be * prevented for those affected devices. @@ -5143,6 +5241,7 @@ static const struct pci_dev_acs_enabled { { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs }, /* Wangxun nics */ { PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs }, + { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides }, { 0 } }; diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index bdd302274b9a..5e60ce87e056 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -645,6 +645,16 @@ config THINKPAD_LMI To compile this driver as a module, choose M here: the module will be called think-lmi. +config LEGION_LAPTOP + tristate "Lenovo Legion Laptop Extras" + depends on ACPI + depends on ACPI_WMI || ACPI_WMI = n + depends on HWMON || HWMON = n + select ACPI_PLATFORM_PROFILE + help + This is a driver for Lenovo Legion laptops and contains drivers for + hotkey, fan control, and power mode. + source "drivers/platform/x86/intel/Kconfig" config MSI_EC diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index 1de432e8861e..4938891378b8 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_LENOVO_YMC) += lenovo-ymc.o obj-$(CONFIG_SENSORS_HDAPS) += hdaps.o obj-$(CONFIG_THINKPAD_ACPI) += thinkpad_acpi.o obj-$(CONFIG_THINKPAD_LMI) += think-lmi.o +obj-$(CONFIG_LEGION_LAPTOP) += legion-laptop.o obj-$(CONFIG_YOGABOOK) += lenovo-yogabook.o # Intel diff --git a/drivers/platform/x86/legion-laptop.c b/drivers/platform/x86/legion-laptop.c new file mode 100644 index 000000000000..5ec0a518f685 --- /dev/null +++ b/drivers/platform/x86/legion-laptop.c @@ -0,0 +1,6089 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * legion-laptop.c - Extra Lenovo Legion laptop support, in + * particular for fan curve control and power mode. + * + * Copyright (C) 2022 johnfan + * + * + * This driver might work on other Lenovo Legion models. If you + * want to try it you can pass force=1 as argument + * to the module which will force it to load even when the DMI + * data doesn't match the model AND FIRMWARE. + * + * Support for other hardware of this model is already partially + * provided by the module ideapad-laptop. + * + * The development page for this driver is located at + * https://github.com/johnfanv2/LenovoLegionLinux + * + * This driver exports the files: + * - /sys/kernel/debug/legion/fancurve (ro) + * The fan curve stored in the firmware in the form of a + * human readable table. + * + * - /sys/module/legion_laptop/drivers/platform\:legion/PNP0C09\:00/powermode (rw) + * 0: balanced mode (white) + * 1: performance mode (red) + * 2: quiet mode (blue) + * ?: custom mode (pink) + * + * NOTE: Writing to this will load the default fan curve from + * the firmware for this mode, so the fan curve might + * have to be reconfigured if needed. + * + * It implements the usual hwmon interface to monitor fan speed and temmperature + * and allows to set the fan curve inside the firware. + * + * - /sys/class/hwmon/X/fan1_input or /sys/class/hwmon/X/fan2_input (ro) + * Current fan speed of fan1/fan2. + * - /sys/class/hwmon/X/temp1_input (ro) + * - /sys/class/hwmon/X/temp2_input (ro) + * - /sys/class/hwmon/X/temp3_input (ro) + * Temperature (Celsius) of CPU, GPU, and IC used for fan control. + * - /sys/class/hwmon/X/pwmY_auto_pointZ_pwm (rw) + * PWM (0-255) of the fan at the Y-level in the fan curve + * - /sys/class/hwmon/X/pwmY_auto_pointZ_temp (rw) + * upper temperature of tempZ (CPU, GPU, or IC) at the Y-level in the fan curve + * - /sys/class/hwmon/X/pwmY_auto_pointZ_temp_hyst (rw) + * hysteris (CPU, GPU, or IC) at the Y-level in the fan curve. The lower + * temperatue of the level is the upper temperature minus the hysteris + * + * + * Credits for reverse engineering the firmware to: + * - David Woodhouse: heavily inspired by lenovo_laptop.c + * - Luke Cama: Windows version "LegionFanControl" + * - SmokelessCPU: reverse engineering of custom registers in EC + * and commincation method with EC via ports + * - 0x1F9F1: additional reverse engineering for complete fan curve + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("johnfan"); +MODULE_DESCRIPTION("Lenovo Legion laptop extras"); + +static bool force; +module_param(force, bool, 0440); +MODULE_PARM_DESC( + force, + "Force loading this module even if model or BIOS does not match."); + +static bool ec_readonly; +module_param(ec_readonly, bool, 0440); +MODULE_PARM_DESC( + ec_readonly, + "Only read from embedded controller but do not write or change settings."); + +static bool enable_platformprofile = true; +module_param(enable_platformprofile, bool, 0440); +MODULE_PARM_DESC( + enable_platformprofile, + "Enable the platform profile sysfs API to read and write the power mode."); + +#define LEGIONFEATURES \ + "fancurve powermode platformprofile platformprofilenotify minifancurve" + +//Size of fancurve stored in embedded controller +#define MAXFANCURVESIZE 10 + +#define LEGION_DRVR_SHORTNAME "legion" +#define LEGION_HWMON_NAME LEGION_DRVR_SHORTNAME "_hwmon" + +struct legion_private; + +/* =============================== */ +/* Embedded Controller Description */ +/* =============================== */ + +/* The configuration and registers to access the embedded controller + * depending on different the version of the software on the + * embedded controller or and the BIOS/UEFI firmware. + * + * To control fan curve in the embedded controller (EC) one has to + * write to its "RAM". There are different possibilities: + * - EC RAM is memory mapped (write to it with ioremap) + * - access EC RAM via ported mapped IO (outb/inb) + * - access EC RAM via ACPI methods. It is only possible to write + * to part of it (first 0xFF bytes?) + * + * In later models the firmware directly exposes ACPI methods to + * set the fan curve directly, without writing to EC RAM. This + * is done inside the ACPI method. + */ + +/** + * Offsets for interesting values inside the EC RAM (0 = start of + * EC RAM) These might change depending on the software inside of + * the EC, which can be updated by a BIOS update from Lenovo. + */ +// TODO: same order as in initialization +struct ec_register_offsets { + // Super I/O Configuration Registers + // 7.15 General Control (GCTRL) + // General Control (GCTRL) + // (see EC Interface Registers and 6.2 Plug and Play Configuration (PNPCFG)) in datasheet + // note: these are in two places saved + // in EC Interface Registers and in super io configuration registers + // Chip ID + u16 ECHIPID1; + u16 ECHIPID2; + // Chip Version + u16 ECHIPVER; + u16 ECDEBUG; + + // Lenovo Custom OEM extension + // Firmware of ITE can be extended by + // custom program using its own "variables" + // These are the offsets to these "variables" + u16 EXT_FAN_CUR_POINT; + u16 EXT_FAN_POINTS_SIZE; + u16 EXT_FAN1_BASE; + u16 EXT_FAN2_BASE; + u16 EXT_FAN_ACC_BASE; + u16 EXT_FAN_DEC_BASE; + u16 EXT_CPU_TEMP; + u16 EXT_CPU_TEMP_HYST; + u16 EXT_GPU_TEMP; + u16 EXT_GPU_TEMP_HYST; + u16 EXT_VRM_TEMP; + u16 EXT_VRM_TEMP_HYST; + u16 EXT_FAN1_RPM_LSB; + u16 EXT_FAN1_RPM_MSB; + u16 EXT_FAN2_RPM_LSB; + u16 EXT_FAN2_RPM_MSB; + u16 EXT_FAN1_TARGET_RPM; + u16 EXT_FAN2_TARGET_RPM; + u16 EXT_POWERMODE; + u16 EXT_MINIFANCURVE_ON_COOL; + // values + // 0x04: enable mini fan curve if left for too long on cool level + // - this might be due to potential temp failure + // - or just because of really cool temps + // 0xA0: disable it + u16 EXT_LOCKFANCONTROLLER; + u16 EXT_MAXIMUMFANSPEED; + u16 EXT_WHITE_KEYBOARD_BACKLIGHT; + u16 EXT_IC_TEMP_INPUT; + u16 EXT_CPU_TEMP_INPUT; + u16 EXT_GPU_TEMP_INPUT; +}; + +enum access_method { + ACCESS_METHOD_NO_ACCESS = 0, + ACCESS_METHOD_EC = 1, + ACCESS_METHOD_ACPI = 2, + ACCESS_METHOD_WMI = 3, + ACCESS_METHOD_WMI2 = 4, + ACCESS_METHOD_WMI3 = 5, + ACCESS_METHOD_EC2 = 10, // ideapad fancurve method + ACCESS_METHOD_EC3 = 11, // loq +}; + +struct model_config { + const struct ec_register_offsets *registers; + bool check_embedded_controller_id; + u16 embedded_controller_id; + + // first addr in EC we access/scan + phys_addr_t memoryio_physical_ec_start; + size_t memoryio_size; + + // TODO: maybe use bitfield + bool has_minifancurve; + bool has_custom_powermode; + enum access_method access_method_powermode; + + enum access_method access_method_keyboard; + enum access_method access_method_temperature; + enum access_method access_method_fanspeed; + enum access_method access_method_fancurve; + enum access_method access_method_fanfullspeed; + bool three_state_keyboard; + + bool acpi_check_dev; + + phys_addr_t ramio_physical_start; + size_t ramio_size; +}; + +/* =================================== */ +/* Configuration for different models */ +/* =================================== */ + +// Idea by SmokelesssCPU (modified) +// - all default names and register addresses are supported by datasheet +// - register addresses for custom firmware by SmokelesssCPU +static const struct ec_register_offsets ec_register_offsets_v0 = { + .ECHIPID1 = 0x2000, + .ECHIPID2 = 0x2001, + .ECHIPVER = 0x2002, + .ECDEBUG = 0x2003, + .EXT_FAN_CUR_POINT = 0xC534, + .EXT_FAN_POINTS_SIZE = 0xC535, + .EXT_FAN1_BASE = 0xC540, + .EXT_FAN2_BASE = 0xC550, + .EXT_FAN_ACC_BASE = 0xC560, + .EXT_FAN_DEC_BASE = 0xC570, + .EXT_CPU_TEMP = 0xC580, + .EXT_CPU_TEMP_HYST = 0xC590, + .EXT_GPU_TEMP = 0xC5A0, + .EXT_GPU_TEMP_HYST = 0xC5B0, + .EXT_VRM_TEMP = 0xC5C0, + .EXT_VRM_TEMP_HYST = 0xC5D0, + .EXT_FAN1_RPM_LSB = 0xC5E0, + .EXT_FAN1_RPM_MSB = 0xC5E1, + .EXT_FAN2_RPM_LSB = 0xC5E2, + .EXT_FAN2_RPM_MSB = 0xC5E3, + .EXT_MINIFANCURVE_ON_COOL = 0xC536, + .EXT_LOCKFANCONTROLLER = 0xc4AB, + .EXT_CPU_TEMP_INPUT = 0xc538, + .EXT_GPU_TEMP_INPUT = 0xc539, + .EXT_IC_TEMP_INPUT = 0xC5E8, + .EXT_POWERMODE = 0xc420, + .EXT_FAN1_TARGET_RPM = 0xc600, + .EXT_FAN2_TARGET_RPM = 0xc601, + .EXT_MAXIMUMFANSPEED = 0xBD, + .EXT_WHITE_KEYBOARD_BACKLIGHT = (0x3B + 0xC400) +}; + +static const struct ec_register_offsets ec_register_offsets_v1 = { + .ECHIPID1 = 0x2000, + .ECHIPID2 = 0x2001, + .ECHIPVER = 0x2002, + .ECDEBUG = 0x2003, + .EXT_FAN_CUR_POINT = 0xC534, + .EXT_FAN_POINTS_SIZE = 0xC535, + .EXT_FAN1_BASE = 0xC540, + .EXT_FAN2_BASE = 0xC550, + .EXT_FAN_ACC_BASE = 0xC560, + .EXT_FAN_DEC_BASE = 0xC570, + .EXT_CPU_TEMP = 0xC580, + .EXT_CPU_TEMP_HYST = 0xC590, + .EXT_GPU_TEMP = 0xC5A0, + .EXT_GPU_TEMP_HYST = 0xC5B0, + .EXT_VRM_TEMP = 0xC5C0, + .EXT_VRM_TEMP_HYST = 0xC5D0, + .EXT_FAN1_RPM_LSB = 0xC5E0, + .EXT_FAN1_RPM_MSB = 0xC5E1, + .EXT_FAN2_RPM_LSB = 0xC5E2, + .EXT_FAN2_RPM_MSB = 0xC5E3, + .EXT_MINIFANCURVE_ON_COOL = 0xC536, + .EXT_LOCKFANCONTROLLER = 0xc4AB, + .EXT_CPU_TEMP_INPUT = 0xc538, + .EXT_GPU_TEMP_INPUT = 0xc539, + .EXT_IC_TEMP_INPUT = 0xC5E8, + .EXT_POWERMODE = 0xc41D, + .EXT_FAN1_TARGET_RPM = 0xc600, + .EXT_FAN2_TARGET_RPM = 0xc601, + .EXT_MAXIMUMFANSPEED = 0xBD, + .EXT_WHITE_KEYBOARD_BACKLIGHT = (0x3B + 0xC400) +}; + +static const struct ec_register_offsets ec_register_offsets_ideapad_v0 = { + .ECHIPID1 = 0x2000, + .ECHIPID2 = 0x2001, + .ECHIPVER = 0x2002, + .ECDEBUG = 0x2003, + .EXT_FAN_CUR_POINT = 0xC5a0, // not found yet + .EXT_FAN_POINTS_SIZE = 0xC5a0, // constant 0 + .EXT_FAN1_BASE = 0xC5a0, + .EXT_FAN2_BASE = 0xC5a8, + .EXT_FAN_ACC_BASE = 0xC5a0, // not found yet + .EXT_FAN_DEC_BASE = 0xC5a0, // not found yet + .EXT_CPU_TEMP = 0xC550, // and repeated after 8 bytes + .EXT_CPU_TEMP_HYST = 0xC590, // and repeated after 8 bytes + .EXT_GPU_TEMP = 0xC5C0, // and repeated after 8 bytes + .EXT_GPU_TEMP_HYST = 0xC5D0, // and repeated after 8 bytes + .EXT_VRM_TEMP = 0xC5a0, // does not exists or not found + .EXT_VRM_TEMP_HYST = 0xC5a0, // does not exists ot not found yet + .EXT_FAN1_RPM_LSB = 0xC5a0, // not found yet + .EXT_FAN1_RPM_MSB = 0xC5a0, // not found yet + .EXT_FAN2_RPM_LSB = 0xC5a0, // not found yet + .EXT_FAN2_RPM_MSB = 0xC5a0, // not found yet + .EXT_MINIFANCURVE_ON_COOL = 0xC5a0, // does not exists or not found + .EXT_LOCKFANCONTROLLER = 0xC5a0, // does not exists or not found + .EXT_CPU_TEMP_INPUT = 0xC5a0, // not found yet + .EXT_GPU_TEMP_INPUT = 0xC5a0, // not found yet + .EXT_IC_TEMP_INPUT = 0xC5a0, // not found yet + .EXT_POWERMODE = 0xC5a0, // not found yet + .EXT_FAN1_TARGET_RPM = 0xC5a0, // not found yet + .EXT_FAN2_TARGET_RPM = 0xC5a0, // not found yet + .EXT_MAXIMUMFANSPEED = 0xC5a0, // not found yet + .EXT_WHITE_KEYBOARD_BACKLIGHT = 0xC5a0 // not found yet +}; + +static const struct ec_register_offsets ec_register_offsets_ideapad_v1 = { + .ECHIPID1 = 0x2000, + .ECHIPID2 = 0x2001, + .ECHIPVER = 0x2002, + .ECDEBUG = 0x2003, + .EXT_FAN_CUR_POINT = 0xC5a0, // not found yet + .EXT_FAN_POINTS_SIZE = 0xC5a0, // constant 0 + .EXT_FAN1_BASE = 0xC5a0, + .EXT_FAN2_BASE = 0xC5a8, + .EXT_FAN_ACC_BASE = 0xC5a0, // not found yet + .EXT_FAN_DEC_BASE = 0xC5a0, // not found yet + .EXT_CPU_TEMP = 0xC550, // and repeated after 8 bytes + .EXT_CPU_TEMP_HYST = 0xC590, // and repeated after 8 bytes + .EXT_GPU_TEMP = 0xC5C0, // and repeated after 8 bytes + .EXT_GPU_TEMP_HYST = 0xC5D0, // and repeated after 8 bytes + .EXT_VRM_TEMP = 0xC5a0, // does not exists or not found + .EXT_VRM_TEMP_HYST = 0xC5a0, // does not exists ot not found yet + .EXT_FAN1_RPM_LSB = 0xC5a0, // not found yet + .EXT_FAN1_RPM_MSB = 0xC5a0, // not found yet + .EXT_FAN2_RPM_LSB = 0xC5a0, // not found yet + .EXT_FAN2_RPM_MSB = 0xC5a0, // not found yet + .EXT_MINIFANCURVE_ON_COOL = 0xC5a0, // does not exists or not found + .EXT_LOCKFANCONTROLLER = 0xC5a0, // does not exists or not found + .EXT_CPU_TEMP_INPUT = 0xC5a0, // not found yet + .EXT_GPU_TEMP_INPUT = 0xC5a0, // not found yet + .EXT_IC_TEMP_INPUT = 0xC5a0, // not found yet + .EXT_POWERMODE = 0xC5a0, // not found yet + .EXT_FAN1_TARGET_RPM = 0xC5a0, // not found yet + .EXT_FAN2_TARGET_RPM = 0xC5a0, // not found yet + .EXT_MAXIMUMFANSPEED = 0xC5a0, // not found yet + .EXT_WHITE_KEYBOARD_BACKLIGHT = 0xC5a0 // not found yet +}; + +static const struct ec_register_offsets ec_register_offsets_loq_v0 = { + .ECHIPID1 = 0x2000, + .ECHIPID2 = 0x2001, + .ECHIPVER = 0x2002, + .ECDEBUG = 0x2003, + .EXT_FAN_CUR_POINT = 0xC5a0, + .EXT_FAN_POINTS_SIZE = 0xC5a0, // constant 0 + .EXT_FAN1_BASE = 0xC530, + .EXT_FAN2_BASE = 0xC530, // same rpm as cpu + .EXT_FAN_ACC_BASE = 0xC5a0, // not found yet + .EXT_FAN_DEC_BASE = 0xC5a0, // not found yet + .EXT_CPU_TEMP = 0xC52F, + .EXT_CPU_TEMP_HYST = 0xC5a0, // not found yet + .EXT_GPU_TEMP = 0xC531, + .EXT_GPU_TEMP_HYST = 0xC5a0, // not found yet + .EXT_VRM_TEMP = 0xC5a0, // not found yet + .EXT_VRM_TEMP_HYST = 0xC5a0, // not found yet + .EXT_FAN1_RPM_LSB = 0xC5a0, // not found yet + .EXT_FAN1_RPM_MSB = 0xC5a0, // not found yet + .EXT_FAN2_RPM_LSB = 0xC5a0, // not found yet + .EXT_FAN2_RPM_MSB = 0xC5a0, // not found yet + .EXT_MINIFANCURVE_ON_COOL = 0xC5a0, // not found yet + .EXT_LOCKFANCONTROLLER = 0xC5a0, // not found yet + .EXT_CPU_TEMP_INPUT = 0xC5a0, // not found yet + .EXT_GPU_TEMP_INPUT = 0xC5a0, // not found yet + .EXT_IC_TEMP_INPUT = 0xC5a0, // not found yet + .EXT_POWERMODE = 0xc41D, + .EXT_FAN1_TARGET_RPM = 0xC5a0, // not found yet + .EXT_FAN2_TARGET_RPM = 0xC5a0, // not found yet + .EXT_MAXIMUMFANSPEED = 0xC5a0, // not found yet + .EXT_WHITE_KEYBOARD_BACKLIGHT = 0xC5a0 // not found yet +}; + +static const struct model_config model_v0 = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x8227, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = true, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_EC, + .access_method_temperature = ACCESS_METHOD_EC, + .access_method_fancurve = ACCESS_METHOD_EC, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = true, + .ramio_physical_start = 0xFE00D400, + .ramio_size = 0x600 +}; + +static const struct model_config model_j2cn = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x8227, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = true, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_EC, + .access_method_temperature = ACCESS_METHOD_EC, + .access_method_fancurve = ACCESS_METHOD_EC, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = true, + .ramio_physical_start = 0xFE00D400, + .ramio_size = 0x600 +}; + +static const struct model_config model_9vcn = { + .registers = &ec_register_offsets_ideapad_v1, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x8226, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = true, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_WMI, + .access_method_temperature = ACCESS_METHOD_WMI, + .access_method_fancurve = ACCESS_METHOD_EC2, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = false, + .ramio_physical_start = 0xFE00D400, + .ramio_size = 0x600 +}; + +static const struct model_config model_v2022 = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x8227, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = true, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_EC, + .access_method_temperature = ACCESS_METHOD_EC, + .access_method_fancurve = ACCESS_METHOD_EC, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = true, + .ramio_physical_start = 0xFE00D400, + .ramio_size = 0x600 +}; + +static const struct model_config model_4gcn = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x8226, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = true, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_EC, + .access_method_temperature = ACCESS_METHOD_EC, + .access_method_fancurve = ACCESS_METHOD_EC, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = true, + .ramio_physical_start = 0xFE00D400, + .ramio_size = 0x600 +}; + +static const struct model_config model_bvcn = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = false, + .embedded_controller_id = 0x8226, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = true, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_WMI, + .access_method_temperature = ACCESS_METHOD_WMI, + .access_method_fancurve = ACCESS_METHOD_NO_ACCESS, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = false, + .ramio_physical_start = 0xFC7E0800, + .ramio_size = 0x600 +}; + +static const struct model_config model_bhcn = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x8226, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = true, + .has_custom_powermode = false, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_ACPI, + .access_method_fanspeed = ACCESS_METHOD_WMI, + .access_method_temperature = ACCESS_METHOD_ACPI, + .access_method_fancurve = ACCESS_METHOD_EC, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = true, + .ramio_physical_start = 0xFF00D400, + .ramio_size = 0x600 +}; + +static const struct model_config model_kwcn = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x5507, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = false, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_WMI3, + .access_method_temperature = ACCESS_METHOD_WMI3, + .access_method_fancurve = ACCESS_METHOD_WMI3, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = true, + .ramio_physical_start = 0xFE0B0400, + .ramio_size = 0x600 +}; + +static const struct model_config model_m0cn = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x5507, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = true, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_WMI3, + .access_method_temperature = ACCESS_METHOD_WMI3, + .access_method_fancurve = ACCESS_METHOD_WMI3, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = false, + .ramio_physical_start = 0xFE0B0400, + .ramio_size = 0x600 +}; + +static const struct model_config model_m1cn = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x5507, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = true, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_WMI3, + .access_method_temperature = ACCESS_METHOD_WMI3, + .access_method_fancurve = ACCESS_METHOD_WMI3, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = false, + .ramio_physical_start = 0xFE0B0400, + .ramio_size = 0x600 +}; + +static const struct model_config model_m2cn = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x8227, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = true, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_WMI3, + .access_method_temperature = ACCESS_METHOD_WMI3, + .access_method_fancurve = ACCESS_METHOD_WMI3, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = false, + .ramio_physical_start = 0xFE0B0400, + .ramio_size = 0x600 +}; + +static const struct model_config model_m6cn = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x8227, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = true, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_WMI3, + .access_method_temperature = ACCESS_METHOD_WMI3, + .access_method_fancurve = ACCESS_METHOD_WMI3, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = false, + .ramio_physical_start = 0xFE0B0400, + .ramio_size = 0x600 +}; + +static const struct model_config model_k1cn = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x5263, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = false, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_WMI3, + .access_method_temperature = ACCESS_METHOD_WMI3, + .access_method_fancurve = ACCESS_METHOD_WMI3, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = true, + .ramio_physical_start = 0xFE0B0400, + .ramio_size = 0x600 +}; + +static const struct model_config model_lpcn = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x5507, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = false, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_WMI3, + .access_method_temperature = ACCESS_METHOD_WMI3, + .access_method_fancurve = ACCESS_METHOD_WMI3, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = true, + .ramio_physical_start = 0xFE0B0400, + .ramio_size = 0x600 +}; + +static const struct model_config model_kfcn = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x8227, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = false, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_EC, + .access_method_temperature = ACCESS_METHOD_EC, + .access_method_fancurve = ACCESS_METHOD_EC, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = true, + .ramio_physical_start = 0xFE00D400, + .ramio_size = 0x600 +}; + +static const struct model_config model_hacn = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = false, + .embedded_controller_id = 0x8227, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = true, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_EC, + .access_method_temperature = ACCESS_METHOD_EC, + .access_method_fancurve = ACCESS_METHOD_EC, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = true, + .ramio_physical_start = 0xFE00D400, + .ramio_size = 0x600 +}; + +static const struct model_config model_k9cn = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = false, + .embedded_controller_id = 0x8227, + .memoryio_physical_ec_start = 0xC400, // or replace 0xC400 by 0x0400 ? + .memoryio_size = 0x300, + .has_minifancurve = true, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_EC, + .access_method_temperature = ACCESS_METHOD_EC, + .access_method_fancurve = ACCESS_METHOD_EC, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = true, + .ramio_physical_start = 0xFE00D400, + .ramio_size = 0x600 +}; + +static const struct model_config model_eucn = { + .registers = &ec_register_offsets_v1, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x8227, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = true, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_EC, + .access_method_temperature = ACCESS_METHOD_EC, + .access_method_fancurve = ACCESS_METHOD_EC, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = true, + .ramio_physical_start = 0xFE00D400, + .ramio_size = 0x600 +}; + +static const struct model_config model_fccn = { + .registers = &ec_register_offsets_ideapad_v0, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x8227, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = false, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_WMI, + .access_method_temperature = ACCESS_METHOD_ACPI, + .access_method_fancurve = ACCESS_METHOD_EC2, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = true, + .ramio_physical_start = 0xFE00D400, + .ramio_size = 0x600 +}; + +static const struct model_config model_h3cn = { + //0xFE0B0800 + .registers = &ec_register_offsets_v1, + .check_embedded_controller_id = false, + .embedded_controller_id = 0x8227, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = false, + .has_custom_powermode = false, + .access_method_powermode = ACCESS_METHOD_WMI, + // not implemented (properly) in WMI, RGB conrolled by USB + .access_method_keyboard = ACCESS_METHOD_NO_ACCESS, + // accessing fan speed is not implemented in ACPI + // a variable in the operation region (or not found) + // and not per WMI (methods returns constant 0) + .access_method_fanspeed = ACCESS_METHOD_NO_ACCESS, + .access_method_temperature = ACCESS_METHOD_WMI, + .access_method_fancurve = ACCESS_METHOD_NO_ACCESS, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = false, + .ramio_physical_start = 0xFE0B0800, + .ramio_size = 0x600 +}; + +static const struct model_config model_e9cn = { + //0xFE0B0800 + .registers = &ec_register_offsets_v1, + .check_embedded_controller_id = false, + .embedded_controller_id = 0x8227, + .memoryio_physical_ec_start = 0xC400, //0xFC7E0800 + .memoryio_size = 0x300, + .has_minifancurve = false, + .has_custom_powermode = false, + .access_method_powermode = ACCESS_METHOD_WMI, + // not implemented (properly) in WMI, RGB conrolled by USB + .access_method_keyboard = ACCESS_METHOD_NO_ACCESS, + // accessing fan speed is not implemented in ACPI + // a variable in the operation region (or not found) + // and not per WMI (methods returns constant 0) + .access_method_fanspeed = ACCESS_METHOD_WMI, + .access_method_temperature = ACCESS_METHOD_WMI, + .access_method_fancurve = ACCESS_METHOD_NO_ACCESS, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = false, + .ramio_physical_start = 0xFC7E0800, + .ramio_size = 0x600 +}; + +static const struct model_config model_8jcn = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x8226, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = true, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_WMI, + .access_method_temperature = ACCESS_METHOD_WMI, + .access_method_fancurve = ACCESS_METHOD_EC, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = false, + .ramio_physical_start = 0xFE00D400, + .ramio_size = 0x600 +}; + +static const struct model_config model_jncn = { + .registers = &ec_register_offsets_v1, + .check_embedded_controller_id = false, + .embedded_controller_id = 0x8227, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = false, + .has_custom_powermode = false, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_NO_ACCESS, + .access_method_fanspeed = ACCESS_METHOD_WMI, + .access_method_temperature = ACCESS_METHOD_WMI, + .access_method_fancurve = ACCESS_METHOD_NO_ACCESS, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = false, + .ramio_physical_start = 0xFC7E0800, + .ramio_size = 0x600 +}; + +// Yoga Model! +static const struct model_config model_j1cn = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x8227, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = true, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_EC, + .access_method_temperature = ACCESS_METHOD_EC, + .access_method_fancurve = ACCESS_METHOD_EC, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = true, + .ramio_physical_start = 0xFE0B0400, + .ramio_size = 0x600 +}; + +// Yoga Model! +static const struct model_config model_dmcn = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x8227, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = true, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_EC, + .access_method_temperature = ACCESS_METHOD_EC, + .access_method_fancurve = ACCESS_METHOD_EC, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = true, + .ramio_physical_start = 0xFE700D00, + .ramio_size = 0x600 +}; + +// Yoga Model! +static const struct model_config model_khcn = { + .registers = &ec_register_offsets_v0, + .check_embedded_controller_id = false, + .embedded_controller_id = 0x8227, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = true, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_EC, + .access_method_keyboard = ACCESS_METHOD_WMI, + .access_method_fanspeed = ACCESS_METHOD_EC, + .access_method_temperature = ACCESS_METHOD_EC, + .access_method_fancurve = ACCESS_METHOD_EC, + .access_method_fanfullspeed = ACCESS_METHOD_WMI, + .acpi_check_dev = false, + .ramio_physical_start = 0xFE0B0400, + .ramio_size = 0x600 +}; + +// LOQ Model +static const struct model_config model_lzcn = { + .registers = &ec_register_offsets_loq_v0, + .check_embedded_controller_id = true, + .embedded_controller_id = 0x8227, + .memoryio_physical_ec_start = 0xC400, + .memoryio_size = 0x300, + .has_minifancurve = true, + .has_custom_powermode = true, + .access_method_powermode = ACCESS_METHOD_WMI, + .access_method_keyboard = ACCESS_METHOD_WMI2, + .access_method_fanspeed = ACCESS_METHOD_WMI3, + .access_method_temperature = ACCESS_METHOD_WMI3, + .access_method_fancurve = ACCESS_METHOD_EC3, + .access_method_fanfullspeed = ACCESS_METHOD_WMI3, + .acpi_check_dev = false, + .ramio_physical_start = 0xFE0B0400, + .ramio_size = 0x600 +}; + +static const struct dmi_system_id denylist[] = { {} }; + +static const struct dmi_system_id optimistic_allowlist[] = { + { + // Release year: 2021 + // Generation: 6 + // Name: Legion 5, Legion 5 pro, Legion 7 + // Family: Legion 5 15ACH6H, ... + .ident = "GKCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "GKCN"), + }, + .driver_data = (void *)&model_v0 + }, + { + // Release year: 2020 + .ident = "EUCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "EUCN"), + }, + .driver_data = (void *)&model_eucn + }, + { + // Release year: 2020 + .ident = "EFCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "EFCN"), + }, + .driver_data = (void *)&model_v0 + }, + { + // Release year: 2020 + .ident = "FSCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "FSCN"), + }, + .driver_data = (void *)&model_v0 + }, + { + // Release year: 2021 + .ident = "HHCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "HHCN"), + }, + .driver_data = (void *)&model_v0 + }, + { + // Release year: 2022 + .ident = "H1CN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "H1CN"), + }, + .driver_data = (void *)&model_v0 + }, + { + // Release year: 2022 + .ident = "J2CN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "J2CN"), + }, + .driver_data = (void *)&model_v0 + }, + { + // Release year: 2022 + .ident = "JUCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "JUCN"), + }, + .driver_data = (void *)&model_v0 + }, + { + // Release year: 2022 + .ident = "KFCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "KFCN"), + }, + .driver_data = (void *)&model_kfcn + }, + { + // Release year: 2021 + .ident = "HACN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "HACN"), + }, + .driver_data = (void *)&model_hacn + }, + { + // Release year: 2021 + .ident = "G9CN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "G9CN"), + }, + .driver_data = (void *)&model_v0 + }, + { + // Release year: 2022 + .ident = "K9CN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "K9CN"), + }, + .driver_data = (void *)&model_k9cn + }, + { + // e.g. IdeaPad Gaming 3 15ARH05 + .ident = "FCCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "FCCN"), + }, + .driver_data = (void *)&model_fccn + }, + { + // e.g. IdeaPad Gaming 3 15ARH05 (8K21) + .ident = "H4CN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "H4CN"), + }, + .driver_data = (void *)&model_fccn + }, + { + // e.g. Ideapad Gaming 3 15ACH6 + .ident = "H3CN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "H3CN"), + }, + .driver_data = (void *)&model_h3cn + }, + { + // e.g. IdeaPad Gaming 3 15ARH7 (2022) + .ident = "JNCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "JNCN"), + }, + .driver_data = (void *)&model_jncn + }, + { + // 2020, seems very different in ACPI dissassembly + .ident = "E9CN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "E9CN"), + }, + .driver_data = (void *)&model_e9cn + }, + { + // e.g. Legion Y7000 (older version) + .ident = "8JCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "8JCN"), + }, + .driver_data = (void *)&model_8jcn + }, + { + // e.g. Legion 7i Pro 2023 + .ident = "KWCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "KWCN"), + }, + .driver_data = (void *)&model_kwcn + }, + { + // e.g. Legion Pro 5 2023 or R9000P + .ident = "LPCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "LPCN"), + }, + .driver_data = (void *)&model_lpcn + }, + { + // e.g. Lenovo Legion 5i/Y7000 2019 PG0 + .ident = "BHCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "BHCN"), + }, + .driver_data = (void *)&model_bhcn + }, + { + // e.g. Lenovo 7 16IAX7 + .ident = "K1CN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "K1CN"), + }, + .driver_data = (void *)&model_k1cn + }, + { + // e.g. Legion Y720 + .ident = "4GCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "4GCN"), + }, + .driver_data = (void *)&model_4gcn + }, + { + // e.g. Legion Slim 5 16APH8 2023 + .ident = "M3CN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "M3CN"), + }, + .driver_data = (void *)&model_lpcn + }, + { + // e.g. Legion Y7000p-1060 + .ident = "9VCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "9VCN"), + }, + .driver_data = (void *)&model_9vcn + }, + { + // e.g. Legion Y9000X + .ident = "JYCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "JYCN"), + }, + .driver_data = (void *)&model_v2022 + }, + { + // e.g. Legion Y740-15IRH, older model e.g. with GTX 1660 + .ident = "BVCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "BVCN"), + }, + .driver_data = (void *)&model_bvcn + }, + { + // e.g. Legion 5 Pro 16IAH7H with a RTX 3070 Ti + .ident = "J2CN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "J2CN"), + }, + .driver_data = (void *)&model_j2cn + }, + { + // e.g. Lenovo Yoga 7 16IAH7 with GPU Intel DG2 Arc A370M + .ident = "J1CN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "J1CN"), + }, + .driver_data = (void *)&model_j1cn + }, + { + // e.g. Legion Slim 7 16IRH8 (2023) with RTX 4070 + .ident = "M0CN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "M0CN"), + }, + .driver_data = (void *)&model_m0cn + }, + { + // e.g. Legion Slim 7 16IRH8 (2023) AMD Ryzen 7 7840HS with RTX 4060 + .ident = "M1CN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "M1CN"), + }, + .driver_data = (void *)&model_m1cn + }, + { + // e.g. Legion Slim 5 16IRH8 (2023) with RTX 4070 + .ident = "M2CN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "M2CN"), + }, + .driver_data = (void *)&model_m2cn + }, + { + // e.g. Lenovo Yoga Slim 7 gen 8 (2023) + .ident = "M6CN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "M6CN"), + }, + .driver_data = (void *)&model_m6cn + }, + { + // e.g. Yoga Slim 7-14ARE05 + .ident = "DMCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "DMCN"), + }, + .driver_data = (void *)&model_dmcn + }, + { + // e.g. Yoga Slim 7 Pro 14ARH7 + .ident = "KHCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "KHCN"), + }, + .driver_data = (void *)&model_khcn + }, + { + // e.g. LOQ 15IRH8 + .ident = "LZCN", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BIOS_VERSION, "LZCN"), + }, + .driver_data = (void *)&model_lzcn + }, + {} +}; + +/* ================================= */ +/* ACPI and WMI access */ +/* ================================= */ + +// function from ideapad-laptop.c +static int eval_int(acpi_handle handle, const char *name, unsigned long *res) +{ + unsigned long long result; + acpi_status status; + + status = acpi_evaluate_integer(handle, (char *)name, NULL, &result); + if (ACPI_FAILURE(status)) + return -EIO; + + *res = result; + + return 0; +} + +// function from ideapad-laptop.c +static int exec_simple_method(acpi_handle handle, const char *name, + unsigned long arg) +{ + acpi_status status = + acpi_execute_simple_method(handle, (char *)name, arg); + + return ACPI_FAILURE(status) ? -EIO : 0; +} + +// function from ideapad-laptop.c +static int exec_sbmc(acpi_handle handle, unsigned long arg) +{ + // \_SB.PCI0.LPC0.EC0.VPC0.SBMC + return exec_simple_method(handle, "VPC0.SBMC", arg); +} + +//static int eval_qcho(acpi_handle handle, unsigned long *res) +//{ +// // \_SB.PCI0.LPC0.EC0.QCHO +// return eval_int(handle, "QCHO", res); +//} + +static int eval_gbmd(acpi_handle handle, unsigned long *res) +{ + return eval_int(handle, "VPC0.GBMD", res); +} + +static int eval_spmo(acpi_handle handle, unsigned long *res) +{ + // \_SB.PCI0.LPC0.EC0.QCHO + return eval_int(handle, "VPC0.BTSM", res); +} + +static int acpi_process_buffer_to_ints(const char *id_name, int id_nr, + acpi_status status, + struct acpi_buffer *out_buffer, u8 *res, + size_t ressize) +{ + // seto to NULL call kfree on NULL if next function call fails + union acpi_object *out = NULL; + size_t i; + int error = 0; + + if (ACPI_FAILURE(status)) { + pr_info("ACPI evaluation error for: %s:%d\n", id_name, id_nr); + error = -EFAULT; + goto err; + } + + out = out_buffer->pointer; + if (!out) { + pr_info("Unexpected ACPI result for %s:%d\n", id_name, id_nr); + error = -AE_ERROR; + goto err; + } + + if (out->type != ACPI_TYPE_BUFFER || out->buffer.length != ressize) { + pr_info("Unexpected ACPI result for %s:%d: expected type %d but got %d; expected length %lu but got %u;\n", + id_name, id_nr, ACPI_TYPE_BUFFER, out->type, ressize, + out->buffer.length); + error = -AE_ERROR; + goto err; + } + +// Reduced verbosity (only printing when ACPI result have bad parameters) +// pr_info("ACPI result for %s:%d: ACPI buffer length: %u\n", id_name, +// id_nr, out->buffer.length); + + for (i = 0; i < ressize; ++i) + res[i] = out->buffer.pointer[i]; + error = 0; + +err: + kfree(out); + return error; +} + +//static int exec_ints(acpi_handle handle, const char *method_name, +// struct acpi_object_list *params, u8 *res, size_t ressize) +//{ +// acpi_status status; +// struct acpi_buffer out_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + +// status = acpi_evaluate_object(handle, (acpi_string)method_name, params, +// &out_buffer); + +// return acpi_process_buffer_to_ints(method_name, 0, status, &out_buffer, +// res, ressize); +//} + +static int wmi_exec_ints(const char *guid, u8 instance, u32 method_id, + const struct acpi_buffer *params, u8 *res, + size_t ressize) +{ + acpi_status status; + struct acpi_buffer out_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + + status = wmi_evaluate_method(guid, instance, method_id, params, + &out_buffer); + return acpi_process_buffer_to_ints(guid, method_id, status, &out_buffer, + res, ressize); +} + +static int wmi_exec_int(const char *guid, u8 instance, u32 method_id, + const struct acpi_buffer *params, unsigned long *res) +{ + acpi_status status; + struct acpi_buffer out_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + // set to NULL and call kfree on NULL if next function call fails + union acpi_object *out = NULL; + int error = 0; + + status = wmi_evaluate_method(guid, instance, method_id, params, + &out_buffer); + + if (ACPI_FAILURE(status)) { + pr_info("WMI evaluation error for: %s:%d\n", guid, method_id); + error = -EFAULT; + goto err; + } + + out = out_buffer.pointer; + if (!out) { + pr_info("Unexpected ACPI result for %s:%d", guid, method_id); + error = -AE_ERROR; + goto err; + } + + if (out->type != ACPI_TYPE_INTEGER) { + pr_info("Unexpected ACPI result for %s:%d: expected type %d but got %d\n", + guid, method_id, ACPI_TYPE_INTEGER, out->type); + error = -AE_ERROR; + goto err; + } + + *res = out->integer.value; + error = 0; + +err: + kfree(out); + return error; +} + +static int wmi_exec_noarg_int(const char *guid, u8 instance, u32 method_id, + unsigned long *res) +{ + struct acpi_buffer params; + + params.length = 0; + params.pointer = NULL; + return wmi_exec_int(guid, instance, method_id, ¶ms, res); +} + +static int wmi_exec_noarg_ints(const char *guid, u8 instance, u32 method_id, + u8 *res, size_t ressize) +{ + struct acpi_buffer params; + + params.length = 0; + params.pointer = NULL; + return wmi_exec_ints(guid, instance, method_id, ¶ms, res, ressize); +} + +static int wmi_exec_arg(const char *guid, u8 instance, u32 method_id, void *arg, + size_t arg_size) +{ + struct acpi_buffer params; + acpi_status status; + + params.length = arg_size; + params.pointer = arg; + status = wmi_evaluate_method(guid, instance, method_id, ¶ms, NULL); + + if (ACPI_FAILURE(status)) + return -EIO; + return 0; +} + +/* ================================= */ +/* Lenovo WMI config */ +/* ================================= */ +#define LEGION_WMI_GAMEZONE_GUID "887B54E3-DDDC-4B2C-8B88-68A26A8835D0" +// GPU over clock +#define WMI_METHOD_ID_ISSUPPORTGPUOC 4 + +// Fan speed +// only fully implemented for some models here +// often implemented in other classes and methods too +// below +#define WMI_METHOD_ID_GETFAN1SPEED 8 +#define WMI_METHOD_ID_GETFAN2SPEED 9 + +// Version of ACPI +#define WMI_METHOD_ID_GETVERSION 11 +// Does it support CPU overclock? +#define WMI_METHOD_ID_ISSUPPORTCPUOC 14 +// Temperatures +// only fully implemented for some models here +// often implemented in other classes and methods too +// below +#define WMI_METHOD_ID_GETCPUTEMP 18 +#define WMI_METHOD_ID_GETGPUTEMP 19 + +// two state keyboard light +#define WMI_METHOD_ID_GETKEYBOARDLIGHT 37 +#define WMI_METHOD_ID_SETKEYBOARDLIGHT 36 +// toggle win key +// 0 = win key enabled; 1 = win key disabled +#define WMI_METHOD_ID_ISSUPPORTDISABLEWINKEY 21 +#define WMI_METHOD_ID_GETWINKEYSTATUS 23 +#define WMI_METHOD_ID_SETWINKEYSTATUS 22 +// toggle touchpad +//0 = touchpad enabled; 1 = touchpad disabled +#define WMI_METHOD_ID_ISSUPPORTDISABLETP 24 +#define WMI_METHOD_ID_GETTPSTATUS 26 +#define WMI_METHOD_ID_SETTPSTATUS 25 +// GSync +#define WMI_METHOD_ID_ISSUPPORTGSYNC 40 +#define WMI_METHOD_ID_GETGSYNCSTATUS 41 +#define WMI_METHOD_ID_SETGSYNCSTATUS 42 +//smartFanMode = powermode +#define WMI_METHOD_ID_ISSUPPORTSMARTFAN 49 +#define WMI_METHOD_ID_GETSMARTFANMODE 45 +#define WMI_METHOD_ID_SETSMARTFANMODE 44 +// power charge mode +#define WMI_METHOD_ID_GETPOWERCHARGEMODE 47 +// overdrive of display to reduce latency +// 0=off, 1=on +#define WMI_METHOD_ID_ISSUPPORTOD 49 +#define WMI_METHOD_ID_GETODSTATUS 50 +#define WMI_METHOD_ID_SETODSTATUS 51 +// thermal mode = power mode used for cooling +#define WMI_METHOD_ID_GETTHERMALMODE 55 +// get max frequency of core 0 +#define WMI_METHOD_ID_GETCPUMAXFREQUENCY 60 +// check if AC adapter has enough power to overclock +#define WMI_METHOD_ID_ISACFITFOROC 62 +// set iGPU (GPU packaged with CPU) state +#define WMI_METHOD_ID_ISSUPPORTIGPUMODE 63 +#define WMI_METHOD_ID_GETIGPUMODESTATUS 64 +#define WMI_METHOD_ID_SETIGPUMODESTATUS 65 +#define WMI_METHOD_ID_NOTIFYDGPUSTATUS 66 +enum IGPUState { + IGPUState_default = 0, + IGPUState_iGPUOnly = 1, + IGPUState_auto = 2 +}; + +#define WMI_GUID_LENOVO_CPU_METHOD "14afd777-106f-4c9b-b334-d388dc7809be" +#define WMI_METHOD_ID_CPU_GET_SUPPORT_OC_STATUS 15 +#define WMI_METHOD_ID_CPU_GET_OC_STATUS 1 +#define WMI_METHOD_ID_CPU_SET_OC_STATUS 2 + +// ppt limit slow +#define WMI_METHOD_ID_CPU_GET_SHORTTERM_POWERLIMIT 3 +#define WMI_METHOD_ID_CPU_SET_SHORTTERM_POWERLIMIT 4 +// ppt stapm +#define WMI_METHOD_ID_CPU_GET_LONGTERM_POWERLIMIT 5 +#define WMI_METHOD_ID_CPU_SET_LONGTERM_POWERLIMIT 6 +// default power limit +#define WMI_METHOD_ID_CPU_GET_DEFAULT_POWERLIMIT 7 +// peak power limit +#define WMI_METHOD_ID_CPU_GET_PEAK_POWERLIMIT 8 +#define WMI_METHOD_ID_CPU_SET_PEAK_POWERLIMIT 9 +// apu sppt powerlimit +#define WMI_METHOD_ID_CPU_GET_APU_SPPT_POWERLIMIT 12 +#define WMI_METHOD_ID_CPU_SET_APU_SPPT_POWERLIMIT 13 +// cross loading powerlimit +#define WMI_METHOD_ID_CPU_GET_CROSS_LOADING_POWERLIMIT 16 +#define WMI_METHOD_ID_CPU_SET_CROSS_LOADING_POWERLIMIT 17 + +#define WMI_GUID_LENOVO_GPU_METHOD "da7547f1-824d-405f-be79-d9903e29ced7" +// overclock GPU possible +#define WMI_METHOD_ID_GPU_GET_OC_STATUS 1 +#define WMI_METHOD_ID_GPU_SET_OC_STATUS 2 +// dynamic boost power +#define WMI_METHOD_ID_GPU_GET_PPAB_POWERLIMIT 3 +#define WMI_METHOD_ID_GPU_SET_PPAB_POWERLIMIT 4 +// configurable TGP (power) +#define WMI_METHOD_ID_GPU_GET_CTGP_POWERLIMIT 5 +#define WMI_METHOD_ID_GPU_SET_CTGP_POWERLIMIT 6 +// ppab/ctgp powerlimit +#define WMI_METHOD_ID_GPU_GET_DEFAULT_PPAB_CTGP_POWERLIMIT 7 +// temperature limit +#define WMI_METHOD_ID_GPU_GET_TEMPERATURE_LIMIT 8 +#define WMI_METHOD_ID_GPU_SET_TEMPERATURE_LIMIT 9 +// boost clock +#define WMI_METHOD_ID_GPU_GET_BOOST_CLOCK 10 + +#define WMI_GUID_LENOVO_FAN_METHOD "92549549-4bde-4f06-ac04-ce8bf898dbaa" +// set fan to maximal speed; dust cleaning mode +// only works in custom power mode +#define WMI_METHOD_ID_FAN_GET_FULLSPEED 1 +#define WMI_METHOD_ID_FAN_SET_FULLSPEED 2 +// max speed of fan +#define WMI_METHOD_ID_FAN_GET_MAXSPEED 3 +#define WMI_METHOD_ID_FAN_SET_MAXSPEED 4 +// fan table in custom mode +#define WMI_METHOD_ID_FAN_GET_TABLE 5 +#define WMI_METHOD_ID_FAN_SET_TABLE 6 +// get speed of fans +#define WMI_METHOD_ID_FAN_GETCURRENTFANSPEED 7 +// get temperatures of CPU and GPU used for controlling cooling +#define WMI_METHOD_ID_FAN_GETCURRENTSENSORTEMPERATURE 8 + +// do not implement following +// #define WMI_METHOD_ID_Fan_SetCurrentFanSpeed 9 + +#define LEGION_WMI_KBBACKLIGHT_GUID "8C5B9127-ECD4-4657-980F-851019F99CA5" +// access the keyboard backlight with 3 states +#define WMI_METHOD_ID_KBBACKLIGHTGET 0x1 +#define WMI_METHOD_ID_KBBACKLIGHTSET 0x2 + +// new method in newer methods to get or set most of the values +// with the two methods GetFeatureValue or SetFeatureValue. +// They are called like GetFeatureValue(feature_id) where +// feature_id is a id for the feature +#define LEGION_WMI_LENOVO_OTHER_METHOD_GUID \ + "dc2a8805-3a8c-41ba-a6f7-092e0089cd3b" +#define WMI_METHOD_ID_GET_FEATURE_VALUE 17 +#define WMI_METHOD_ID_SET_FEATURE_VALUE 18 + +enum OtherMethodFeature { + OtherMethodFeature_U1 = 0x010000, //->PC00.LPCB.EC0.REJF + OtherMethodFeature_U2 = 0x0F0000, //->C00.PEG1.PXP._STA? + OtherMethodFeature_U3 = 0x030000, //->PC00.LPCB.EC0.FLBT? + OtherMethodFeature_CPU_SHORT_TERM_POWER_LIMIT = 0x01010000, + OtherMethodFeature_CPU_LONG_TERM_POWER_LIMIT = 0x01020000, + OtherMethodFeature_CPU_PEAK_POWER_LIMIT = 0x01030000, + OtherMethodFeature_CPU_TEMPERATURE_LIMIT = 0x01040000, + + OtherMethodFeature_APU_PPT_POWER_LIMIT = 0x01050000, + + OtherMethodFeature_CPU_CROSS_LOAD_POWER_LIMIT = 0x01060000, + OtherMethodFeature_CPU_L1_TAU = 0x01070000, + + OtherMethodFeature_GPU_POWER_BOOST = 0x02010000, + OtherMethodFeature_GPU_cTGP = 0x02020000, + OtherMethodFeature_GPU_TEMPERATURE_LIMIT = 0x02030000, + OtherMethodFeature_GPU_POWER_TARGET_ON_AC_OFFSET_FROM_BASELINE = + 0x02040000, + + OtherMethodFeature_FAN_SPEED_1 = 0x04030001, + OtherMethodFeature_FAN_SPEED_2 = 0x04030002, + + OtherMethodFeature_C_U1 = 0x05010000, + OtherMethodFeature_TEMP_CPU = 0x05040000, + OtherMethodFeature_TEMP_GPU = 0x05050000, +}; + +static ssize_t wmi_other_method_get_value(enum OtherMethodFeature feature_id, + int *value) +{ + struct acpi_buffer params; + int error; + unsigned long res; + u32 param1 = feature_id; + + params.length = sizeof(param1); + params.pointer = ¶m1; + error = wmi_exec_int(LEGION_WMI_LENOVO_OTHER_METHOD_GUID, 0, + WMI_METHOD_ID_GET_FEATURE_VALUE, ¶ms, &res); + if (!error) + *value = res; + return error; +} + +/* =================================== */ +/* EC RAM Access with memory mapped IO */ +/* =================================== */ + +struct ecram_memoryio { + // TODO: start of remapped memory in EC RAM is assumed to be 0 + // u16 ecram_start; + + // physical address of remapped IO, depends on model and firmware + phys_addr_t physical_start; + // start adress of region in ec memory + phys_addr_t physical_ec_start; + // virtual address of remapped IO + u8 *virtual_start; + // size of remapped access + size_t size; +}; + +/** + * physical_start : corresponds to EC RAM 0 inside EC + * size: size of remapped region + * + * strong exception safety + */ +static ssize_t ecram_memoryio_init(struct ecram_memoryio *ec_memoryio, + phys_addr_t physical_start, + phys_addr_t physical_ec_start, size_t size) +{ + void *virtual_start = ioremap(physical_start, size); + + if (!IS_ERR_OR_NULL(virtual_start)) { + ec_memoryio->virtual_start = virtual_start; + ec_memoryio->physical_start = physical_start; + ec_memoryio->physical_ec_start = physical_ec_start; + ec_memoryio->size = size; + pr_info("Successfully mapped embedded controller: 0x%llx (in RAM)/0x%llx (in EC) to virtual 0x%p\n", + ec_memoryio->physical_start, + ec_memoryio->physical_ec_start, + ec_memoryio->virtual_start); + } else { + pr_info("Error mapping embedded controller memory at 0x%llx\n", + physical_start); + return -ENOMEM; + } + return 0; +} + +static void ecram_memoryio_exit(struct ecram_memoryio *ec_memoryio) +{ + if (ec_memoryio->virtual_start != NULL) { + pr_info("Unmapping embedded controller memory at 0x%llx (in RAM)/0x%llx (in EC) at virtual 0x%p\n", + ec_memoryio->physical_start, + ec_memoryio->physical_ec_start, + ec_memoryio->virtual_start); + iounmap(ec_memoryio->virtual_start); + ec_memoryio->virtual_start = NULL; + } +} + +/* Read a byte from the EC RAM. + * + * Return status because of commong signature for alle + * methods to access EC RAM. + */ +static ssize_t ecram_memoryio_read(const struct ecram_memoryio *ec_memoryio, + u16 ec_offset, u8 *value) +{ + if (ec_offset < ec_memoryio->physical_ec_start) { + pr_info("Unexpected read at offset %d into EC RAM\n", + ec_offset); + return -1; + } + *value = *(ec_memoryio->virtual_start + + (ec_offset - ec_memoryio->physical_ec_start)); + return 0; +} + +/* Write a byte to the EC RAM. + * + * Return status because of commong signature for alle + * methods to access EC RAM. + */ +ssize_t ecram_memoryio_write(const struct ecram_memoryio *ec_memoryio, + u16 ec_offset, u8 value) +{ + if (ec_offset < ec_memoryio->physical_ec_start) { + pr_info("Unexpected write at offset %d into EC RAM\n", + ec_offset); + return -1; + } + *(ec_memoryio->virtual_start + + (ec_offset - ec_memoryio->physical_ec_start)) = value; + return 0; +} + +/* ================================= */ +/* EC RAM Access with port-mapped IO */ +/* ================================= */ + +/* + * See datasheet of e.g. IT8502E/F/G, e.g. + * 6.2 Plug and Play Configuration (PNPCFG) + * + * Depending on configured BARDSEL register + * the ports + * ECRAM_PORTIO_ADDR_PORT and + * ECRAM_PORTIO_DATA_PORT + * are configured. + * + * By performing IO on these ports one can + * read/write to registers in the EC. + * + * "To access a register of PNPCFG, write target index to + * address port and access this PNPCFG register via + * data port" [datasheet, 6.2 Plug and Play Configuration] + */ + +// IO ports used to write to communicate with embedded controller +// Start of used ports +#define ECRAM_PORTIO_START_PORT 0x4E +// Number of used ports +#define ECRAM_PORTIO_PORTS_SIZE 2 +// Port used to specify address in EC RAM to read/write +// 0x4E/0x4F is the usual port for IO super controller +// 0x2E/0x2F also common (ITE can also be configured to use these) +#define ECRAM_PORTIO_ADDR_PORT 0x4E +// Port to send/receive the value to write/read +#define ECRAM_PORTIO_DATA_PORT 0x4F +// Name used to request ports +#define ECRAM_PORTIO_NAME "legion" + +struct ecram_portio { + /* protects read/write to EC RAM performed + * as a certain sequence of outb, inb + * commands on the IO ports. There can + * be at most one. + */ + struct mutex io_port_mutex; +}; + +static ssize_t ecram_portio_init(struct ecram_portio *ec_portio) +{ + if (!request_region(ECRAM_PORTIO_START_PORT, ECRAM_PORTIO_PORTS_SIZE, + ECRAM_PORTIO_NAME)) { + pr_info("Cannot init ecram_portio the %x ports starting at %x\n", + ECRAM_PORTIO_PORTS_SIZE, ECRAM_PORTIO_START_PORT); + return -ENODEV; + } + //pr_info("Reserved %x ports starting at %x\n", ECRAM_PORTIO_PORTS_SIZE, ECRAM_PORTIO_START_PORT); + mutex_init(&ec_portio->io_port_mutex); + return 0; +} + +static void ecram_portio_exit(struct ecram_portio *ec_portio) +{ + release_region(ECRAM_PORTIO_START_PORT, ECRAM_PORTIO_PORTS_SIZE); +} + +/* Read a byte from the EC RAM. + * + * Return status because of commong signature for alle + * methods to access EC RAM. + */ +static ssize_t ecram_portio_read(struct ecram_portio *ec_portio, u16 offset, + u8 *value) +{ + mutex_lock(&ec_portio->io_port_mutex); + + outb(0x2E, ECRAM_PORTIO_ADDR_PORT); + outb(0x11, ECRAM_PORTIO_DATA_PORT); + outb(0x2F, ECRAM_PORTIO_ADDR_PORT); + // TODO: no explicit cast between types seems to be sometimes + // done and sometimes not + outb((u8)((offset >> 8) & 0xFF), ECRAM_PORTIO_DATA_PORT); + + outb(0x2E, ECRAM_PORTIO_ADDR_PORT); + outb(0x10, ECRAM_PORTIO_DATA_PORT); + outb(0x2F, ECRAM_PORTIO_ADDR_PORT); + outb((u8)(offset & 0xFF), ECRAM_PORTIO_DATA_PORT); + + outb(0x2E, ECRAM_PORTIO_ADDR_PORT); + outb(0x12, ECRAM_PORTIO_DATA_PORT); + outb(0x2F, ECRAM_PORTIO_ADDR_PORT); + *value = inb(ECRAM_PORTIO_DATA_PORT); + + mutex_unlock(&ec_portio->io_port_mutex); + return 0; +} + +/* Write a byte to the EC RAM. + * + * Return status because of commong signature for alle + * methods to access EC RAM. + */ +static ssize_t ecram_portio_write(struct ecram_portio *ec_portio, u16 offset, + u8 value) +{ + mutex_lock(&ec_portio->io_port_mutex); + + outb(0x2E, ECRAM_PORTIO_ADDR_PORT); + outb(0x11, ECRAM_PORTIO_DATA_PORT); + outb(0x2F, ECRAM_PORTIO_ADDR_PORT); + // TODO: no explicit cast between types seems to be sometimes + // done and sometimes not + outb((u8)((offset >> 8) & 0xFF), ECRAM_PORTIO_DATA_PORT); + + outb(0x2E, ECRAM_PORTIO_ADDR_PORT); + outb(0x10, ECRAM_PORTIO_DATA_PORT); + outb(0x2F, ECRAM_PORTIO_ADDR_PORT); + outb((u8)(offset & 0xFF), ECRAM_PORTIO_DATA_PORT); + + outb(0x2E, ECRAM_PORTIO_ADDR_PORT); + outb(0x12, ECRAM_PORTIO_DATA_PORT); + outb(0x2F, ECRAM_PORTIO_ADDR_PORT); + outb(value, ECRAM_PORTIO_DATA_PORT); + + mutex_unlock(&ec_portio->io_port_mutex); + // TODO: remove this + //pr_info("Writing %d to addr %x\n", value, offset); + return 0; +} + +/* =================================== */ +/* EC RAM Access */ +/* =================================== */ + +struct ecram { + struct ecram_portio portio; +}; + +static ssize_t ecram_init(struct ecram *ecram, + phys_addr_t memoryio_ec_physical_start, + size_t region_size) +{ + ssize_t err; + + err = ecram_portio_init(&ecram->portio); + if (err) { + pr_info("Failed ecram_portio_init\n"); + goto err_ecram_portio_init; + } + + return 0; + +err_ecram_portio_init: + return err; +} + +static void ecram_exit(struct ecram *ecram) +{ + pr_info("Unloading legion ecram\n"); + ecram_portio_exit(&ecram->portio); + pr_info("Unloading legion ecram done\n"); +} + +/** Read from EC RAM + * ecram_offset address on the EC + */ +static u8 ecram_read(struct ecram *ecram, u16 ecram_offset) +{ + u8 value; + int err; + + err = ecram_portio_read(&ecram->portio, ecram_offset, &value); + if (err) + pr_info("Error reading EC RAM at 0x%x.\n", ecram_offset); + return value; +} + +static void ecram_write(struct ecram *ecram, u16 ecram_offset, u8 value) +{ + int err; + + if (ec_readonly) { + pr_info("Skipping writing EC RAM to 0x%x: Read-Only.\n", + ecram_offset); + return; + } + err = ecram_portio_write(&ecram->portio, ecram_offset, value); + if (err) + pr_info("Error writing EC RAM to 0x%x: Read-Only.\n", ecram_offset); +} + +/* =============================== */ +/* Reads from EC */ +/* =============================== */ + +static u16 read_ec_id(struct ecram *ecram, const struct model_config *model) +{ + u8 id1 = ecram_read(ecram, model->registers->ECHIPID1); + u8 id2 = ecram_read(ecram, model->registers->ECHIPID2); + + return (id1 << 8) + id2; +} + +static u16 read_ec_version(struct ecram *ecram, + const struct model_config *model) +{ + u8 vers = ecram_read(ecram, model->registers->ECHIPVER); + u8 debug = ecram_read(ecram, model->registers->ECDEBUG); + + return (vers << 8) + debug; +} + +/* ============================= */ +/* Data model for sensor values */ +/* ============================= */ + +struct sensor_values { + u16 fan1_rpm; // current speed in rpm of fan 1 + u16 fan2_rpm; // current speed in rpm of fan2 + u16 fan1_target_rpm; // target speed in rpm of fan 1 + u16 fan2_target_rpm; // target speed in rpm of fan 2 + u8 cpu_temp_celsius; // cpu temperature in celcius + u8 gpu_temp_celsius; // gpu temperature in celcius + u8 ic_temp_celsius; // ic temperature in celcius +}; + +enum SENSOR_ATTR { + SENSOR_CPU_TEMP_ID = 1, + SENSOR_GPU_TEMP_ID = 2, + SENSOR_IC_TEMP_ID = 3, + SENSOR_FAN1_RPM_ID = 4, + SENSOR_FAN2_RPM_ID = 5, + SENSOR_FAN1_TARGET_RPM_ID = 6, + SENSOR_FAN2_TARGET_RPM_ID = 7 +}; + +/* ============================= */ +/* Data model for fan curve */ +/* ============================= */ + +struct fancurve_point { + // rpm1 devided by 100 + u8 rpm1_raw; + // rpm2 devided by 100 + u8 rpm2_raw; + // >=2 , <=5 (lower is faster); must increase by level + u8 accel; + // >=2 , <=5 (lower is faster); must increase by level + u8 decel; + + // min must be lower than or equal to max + // last level max must be 127 + // <=127 cpu max temp for this level; must increase by level + u8 cpu_max_temp_celsius; + // <=127 cpu min temp for this level; must increase by level + u8 cpu_min_temp_celsius; + // <=127 gpu min temp for this level; must increase by level + u8 gpu_max_temp_celsius; + // <=127 gpu max temp for this level; must increase by level + u8 gpu_min_temp_celsius; + // <=127 ic max temp for this level; must increase by level + u8 ic_max_temp_celsius; + // <=127 ic max temp for this level; must increase by level + u8 ic_min_temp_celsius; +}; + +enum FANCURVE_ATTR { + FANCURVE_ATTR_PWM1 = 1, + FANCURVE_ATTR_PWM2 = 2, + FANCURVE_ATTR_CPU_TEMP = 3, + FANCURVE_ATTR_CPU_HYST = 4, + FANCURVE_ATTR_GPU_TEMP = 5, + FANCURVE_ATTR_GPU_HYST = 6, + FANCURVE_ATTR_IC_TEMP = 7, + FANCURVE_ATTR_IC_HYST = 8, + FANCURVE_ATTR_ACCEL = 9, + FANCURVE_ATTR_DECEL = 10, + FANCURVE_SIZE = 11, + FANCURVE_MINIFANCURVE_ON_COOL = 12 +}; + +// used for clearing table entries +static const struct fancurve_point fancurve_point_zero = { 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0 }; + +struct fancurve { + struct fancurve_point points[MAXFANCURVESIZE]; + // number of points used; must be <= MAXFANCURVESIZE + size_t size; + // the point at which fans are run currently + size_t current_point_i; +}; + +// validation functions + +static bool fancurve_is_valid_min_temp(int min_temp) +{ + return min_temp >= 0 && min_temp <= 127; +} + +static bool fancurve_is_valid_max_temp(int max_temp) +{ + return max_temp >= 0 && max_temp <= 127; +} + +// setters with validation +// - make hwmon implementation easier +// - keep fancurve valid, otherwise EC will not properly control fan + +static bool fancurve_set_rpm1(struct fancurve *fancurve, int point_id, int rpm) +{ + bool valid = point_id == 0 ? rpm == 0 : (rpm >= 0 && rpm <= 4500); + + if (valid) + fancurve->points[point_id].rpm1_raw = rpm / 100; + return valid; +} + +static bool fancurve_set_rpm2(struct fancurve *fancurve, int point_id, int rpm) +{ + bool valid = point_id == 0 ? rpm == 0 : (rpm >= 0 && rpm <= 4500); + + if (valid) + fancurve->points[point_id].rpm2_raw = rpm / 100; + return valid; +} + +// TODO: remove { ... } from single line if body + +static bool fancurve_set_accel(struct fancurve *fancurve, int point_id, + int accel) +{ + bool valid = accel >= 2 && accel <= 5; + + if (valid) + fancurve->points[point_id].accel = accel; + return valid; +} + +static bool fancurve_set_decel(struct fancurve *fancurve, int point_id, + int decel) +{ + bool valid = decel >= 2 && decel <= 5; + + if (valid) + fancurve->points[point_id].decel = decel; + return valid; +} + +static bool fancurve_set_cpu_temp_max(struct fancurve *fancurve, int point_id, + int value) +{ + bool valid = fancurve_is_valid_max_temp(value); + + if (valid) + fancurve->points[point_id].cpu_max_temp_celsius = value; + + return valid; +} + +static bool fancurve_set_gpu_temp_max(struct fancurve *fancurve, int point_id, + int value) +{ + bool valid = fancurve_is_valid_max_temp(value); + + if (valid) + fancurve->points[point_id].gpu_max_temp_celsius = value; + return valid; +} + +static bool fancurve_set_ic_temp_max(struct fancurve *fancurve, int point_id, + int value) +{ + bool valid = fancurve_is_valid_max_temp(value); + + if (valid) + fancurve->points[point_id].ic_max_temp_celsius = value; + return valid; +} + +static bool fancurve_set_cpu_temp_min(struct fancurve *fancurve, int point_id, + int value) +{ + bool valid = fancurve_is_valid_max_temp(value); + + if (valid) + fancurve->points[point_id].cpu_min_temp_celsius = value; + return valid; +} + +static bool fancurve_set_gpu_temp_min(struct fancurve *fancurve, int point_id, + int value) +{ + bool valid = fancurve_is_valid_min_temp(value); + + if (valid) + fancurve->points[point_id].gpu_min_temp_celsius = value; + return valid; +} + +static bool fancurve_set_ic_temp_min(struct fancurve *fancurve, int point_id, + int value) +{ + bool valid = fancurve_is_valid_min_temp(value); + + if (valid) + fancurve->points[point_id].ic_min_temp_celsius = value; + return valid; +} + +static bool fancurve_set_size(struct fancurve *fancurve, int size, + bool init_values) +{ + bool valid = size >= 1 && size <= MAXFANCURVESIZE; + + if (!valid) + return false; + if (init_values && size < fancurve->size) { + // fancurve size is decreased, but last entry always needs 127 temperatures + // Note: size >=1 + fancurve->points[size - 1].cpu_max_temp_celsius = 127; + fancurve->points[size - 1].ic_max_temp_celsius = 127; + fancurve->points[size - 1].gpu_max_temp_celsius = 127; + } + if (init_values && size > fancurve->size) { + // fancurve increased, so new entries need valid values + int i; + int last = fancurve->size > 0 ? fancurve->size - 1 : 0; + + for (i = fancurve->size; i < size; ++i) + fancurve->points[i] = fancurve->points[last]; + } + return true; +} + +static ssize_t fancurve_print_seqfile(const struct fancurve *fancurve, + struct seq_file *s) +{ + int i; + + seq_printf( + s, + "rpm1|rpm2|acceleration|deceleration|cpu_min_temp|cpu_max_temp|gpu_min_temp|gpu_max_temp|ic_min_temp|ic_max_temp\n"); + for (i = 0; i < fancurve->size; ++i) { + const struct fancurve_point *point = &fancurve->points[i]; + + seq_printf( + s, "%d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\n", + point->rpm1_raw * 100, point->rpm2_raw * 100, + point->accel, point->decel, point->cpu_min_temp_celsius, + point->cpu_max_temp_celsius, + point->gpu_min_temp_celsius, + point->gpu_max_temp_celsius, point->ic_min_temp_celsius, + point->ic_max_temp_celsius); + } + return 0; +} + +struct light { + bool initialized; + struct led_classdev led; + unsigned int last_brightness; + u8 light_id; + unsigned int lower_limit; + unsigned int upper_limit; +}; + +/* ============================= */ +/* Global and shared data between */ +/* all calls to this module */ +/* ============================= */ +// Implemented like ideapad-laptop.c but currently still +// without dynamic memory allocation (instead global _priv) +struct legion_private { + struct platform_device *platform_device; + // TODO: remove or keep? init? + struct acpi_device *adev; + + // Method to access ECRAM + struct ecram ecram; + // Configuration with registers and ECRAM access method + const struct model_config *conf; + + // TODO: maybe refactor and keep only local to each function + // last known fan curve + struct fancurve fancurve; + // configured fan curve from user space + struct fancurve fancurve_configured; + + // update lock, when partial values of fancurve are changed + struct mutex fancurve_mutex; + + //interfaces + struct dentry *debugfs_dir; + struct device *hwmon_dev; + struct platform_profile_handler platform_profile_handler; + + struct light kbd_bl; + struct light ylogo_light; + struct light iport_light; + + // TODO: remove? + bool loaded; + + // TODO: remove, only for reverse enginnering + struct ecram_memoryio ec_memoryio; +}; + +// shared between different drivers: WMI, platform and protected by mutex +static struct legion_private *legion_shared; +static struct legion_private _priv; +static DEFINE_MUTEX(legion_shared_mutex); + +static int legion_shared_init(struct legion_private *priv) +{ + int ret; + + mutex_lock(&legion_shared_mutex); + + if (!legion_shared) { + legion_shared = priv; + mutex_init(&legion_shared->fancurve_mutex); + ret = 0; + } else { + pr_warn("Found multiple platform devices\n"); + ret = -EINVAL; + } + + priv->loaded = true; + mutex_unlock(&legion_shared_mutex); + + return ret; +} + +static void legion_shared_exit(struct legion_private *priv) +{ + pr_info("Unloading legion shared\n"); + mutex_lock(&legion_shared_mutex); + + if (legion_shared == priv) + legion_shared = NULL; + + mutex_unlock(&legion_shared_mutex); + pr_info("Unloading legion shared done\n"); +} + +static int get_simple_wmi_attribute(struct legion_private *priv, + const char *guid, u8 instance, + u32 method_id, bool invert, + unsigned long scale, unsigned long *value) +{ + unsigned long state = 0; + int err; + + if (scale == 0) { + pr_info("Scale cannot be 0\n"); + return -EINVAL; + } + err = wmi_exec_noarg_int(guid, instance, method_id, &state); + if (err) + return -EINVAL; + + // TODO: remove later + pr_info("%swith raw value: %ld\n", __func__, state); + + state = state * scale; + + if (invert) + state = !state; + *value = state; + return 0; +} + +static int get_simple_wmi_attribute_bool(struct legion_private *priv, + const char *guid, u8 instance, + u32 method_id, bool invert, + unsigned long scale, bool *value) +{ + unsigned long int_val = *value; + int err = get_simple_wmi_attribute(priv, guid, instance, method_id, + invert, scale, &int_val); + *value = int_val; + return err; +} + +static int set_simple_wmi_attribute(struct legion_private *priv, + const char *guid, u8 instance, + u32 method_id, bool invert, int scale, + int state) +{ + int err; + u8 in_param; + + if (scale == 0) { + pr_info("Scale cannot be 0\n"); + return -EINVAL; + } + + if (invert) + state = !state; + + in_param = state / scale; + + err = wmi_exec_arg(guid, instance, method_id, &in_param, + sizeof(in_param)); + return err; +} + +/* ============================= */ +/* Sensor value reading/writing */ +/* ============================= */ + +static int ec_read_sensor_values(struct ecram *ecram, + const struct model_config *model, + struct sensor_values *values) +{ + values->fan1_target_rpm = + 100 * ecram_read(ecram, model->registers->EXT_FAN1_TARGET_RPM); + values->fan2_target_rpm = + 100 * ecram_read(ecram, model->registers->EXT_FAN2_TARGET_RPM); + + values->fan1_rpm = + ecram_read(ecram, model->registers->EXT_FAN1_RPM_LSB) + + (((int)ecram_read(ecram, model->registers->EXT_FAN1_RPM_MSB)) + << 8); + values->fan2_rpm = + ecram_read(ecram, model->registers->EXT_FAN2_RPM_LSB) + + (((int)ecram_read(ecram, model->registers->EXT_FAN2_RPM_MSB)) + << 8); + + values->cpu_temp_celsius = + ecram_read(ecram, model->registers->EXT_CPU_TEMP_INPUT); + values->gpu_temp_celsius = + ecram_read(ecram, model->registers->EXT_GPU_TEMP_INPUT); + values->ic_temp_celsius = + ecram_read(ecram, model->registers->EXT_IC_TEMP_INPUT); + + values->cpu_temp_celsius = ecram_read(ecram, 0xC5E6); + values->gpu_temp_celsius = ecram_read(ecram, 0xC5E7); + values->ic_temp_celsius = ecram_read(ecram, 0xC5E8); + + return 0; +} + +static ssize_t ec_read_temperature(struct ecram *ecram, + const struct model_config *model, + int sensor_id, int *temperature) +{ + int err = 0; + unsigned long res; + + if (sensor_id == 0) { + res = ecram_read(ecram, 0xC5E6); + } else if (sensor_id == 1) { + res = ecram_read(ecram, 0xC5E7); + } else { + // TODO: use all correct error codes + return -EEXIST; + } + if (!err) + *temperature = res; + return err; +} + +static ssize_t ec_read_fanspeed(struct ecram *ecram, + const struct model_config *model, int fan_id, + int *fanspeed_rpm) +{ + int err = 0; + unsigned long res; + + if (fan_id == 0) { + res = ecram_read(ecram, model->registers->EXT_FAN1_RPM_LSB) + + (((int)ecram_read(ecram, + model->registers->EXT_FAN1_RPM_MSB)) + << 8); + } else if (fan_id == 1) { + res = ecram_read(ecram, model->registers->EXT_FAN2_RPM_LSB) + + (((int)ecram_read(ecram, + model->registers->EXT_FAN2_RPM_MSB)) + << 8); + } else { + // TODO: use all correct error codes + return -EEXIST; + } + if (!err) + *fanspeed_rpm = res; + return err; +} + +// '\_SB.PCI0.LPC0.EC0.FANS +#define ACPI_PATH_FAN_SPEED1 "FANS" +// '\_SB.PCI0.LPC0.EC0.FA2S +#define ACPI_PATH_FAN_SPEED2 "FA2S" + +static ssize_t acpi_read_fanspeed(struct legion_private *priv, int fan_id, + int *value) +{ + int err; + unsigned long acpi_value; + const char *acpi_path; + + if (fan_id == 0) { + acpi_path = ACPI_PATH_FAN_SPEED1; + } else if (fan_id == 1) { + acpi_path = ACPI_PATH_FAN_SPEED2; + } else { + // TODO: use all correct error codes + return -EEXIST; + } + err = eval_int(priv->adev->handle, acpi_path, &acpi_value); + if (!err) + *value = (int)acpi_value * 100; + return err; +} + +// '\_SB.PCI0.LPC0.EC0.CPUT +#define ACPI_PATH_CPU_TEMP "CPUT" +// '\_SB.PCI0.LPC0.EC0.GPUT +#define ACPI_PATH_GPU_TEMP "GPUT" + +static ssize_t acpi_read_temperature(struct legion_private *priv, int fan_id, + int *value) +{ + int err; + unsigned long acpi_value; + const char *acpi_path; + + if (fan_id == 0) { + acpi_path = ACPI_PATH_CPU_TEMP; + } else if (fan_id == 1) { + acpi_path = ACPI_PATH_GPU_TEMP; + } else { + // TODO: use all correct error codes + return -EEXIST; + } + err = eval_int(priv->adev->handle, acpi_path, &acpi_value); + if (!err) + *value = (int)acpi_value; + return err; +} + +// fan_id: 0 or 1 +static ssize_t wmi_read_fanspeed(int fan_id, int *fanspeed_rpm) +{ + int err; + unsigned long res; + struct acpi_buffer params; + + params.length = 1; + params.pointer = &fan_id; + + err = wmi_exec_int(WMI_GUID_LENOVO_FAN_METHOD, 0, + WMI_METHOD_ID_FAN_GETCURRENTFANSPEED, ¶ms, &res); + + if (!err) + *fanspeed_rpm = res; + return err; +} + +//sensor_id: cpu = 0, gpu = 1 +static ssize_t wmi_read_temperature(int sensor_id, int *temperature) +{ + int err; + unsigned long res; + struct acpi_buffer params; + + if (sensor_id == 0) + sensor_id = 0x03; + else if (sensor_id == 1) + sensor_id = 0x04; + else { + // TODO: use all correct error codes + return -EEXIST; + } + + params.length = 1; + params.pointer = &sensor_id; + + err = wmi_exec_int(WMI_GUID_LENOVO_FAN_METHOD, 0, + WMI_METHOD_ID_FAN_GETCURRENTSENSORTEMPERATURE, + ¶ms, &res); + + if (!err) + *temperature = res; + return err; +} + +// fan_id: 0 or 1 +static ssize_t wmi_read_fanspeed_gz(int fan_id, int *fanspeed_rpm) +{ + int err; + u32 method_id; + unsigned long res; + + if (fan_id == 0) + method_id = WMI_METHOD_ID_GETFAN1SPEED; + else if (fan_id == 1) + method_id = WMI_METHOD_ID_GETFAN2SPEED; + else { + // TODO: use all correct error codes + return -EEXIST; + } + err = wmi_exec_noarg_int(LEGION_WMI_GAMEZONE_GUID, 0, method_id, &res); + + if (!err) + *fanspeed_rpm = res; + return err; +} + +//sensor_id: cpu = 0, gpu = 1 +static ssize_t wmi_read_temperature_gz(int sensor_id, int *temperature) +{ + int err; + u32 method_id; + unsigned long res; + + if (sensor_id == 0) + method_id = WMI_METHOD_ID_GETCPUTEMP; + else if (sensor_id == 1) + method_id = WMI_METHOD_ID_GETGPUTEMP; + else { + // TODO: use all correct error codes + return -EEXIST; + } + + err = wmi_exec_noarg_int(LEGION_WMI_GAMEZONE_GUID, 0, method_id, &res); + + if (!err) + *temperature = res; + return err; +} + +// fan_id: 0 or 1 +static ssize_t wmi_read_fanspeed_other(int fan_id, int *fanspeed_rpm) +{ + int err; + enum OtherMethodFeature featured_id; + int res; + + if (fan_id == 0) + featured_id = OtherMethodFeature_FAN_SPEED_1; + else if (fan_id == 1) + featured_id = OtherMethodFeature_FAN_SPEED_2; + else { + // TODO: use all correct error codes + return -EEXIST; + } + + err = wmi_other_method_get_value(featured_id, &res); + + if (!err) + *fanspeed_rpm = res; + return err; +} + +//sensor_id: cpu = 0, gpu = 1 +static ssize_t wmi_read_temperature_other(int sensor_id, int *temperature) +{ + int err; + enum OtherMethodFeature featured_id; + int res; + + if (sensor_id == 0) + featured_id = OtherMethodFeature_TEMP_CPU; + else if (sensor_id == 1) + featured_id = OtherMethodFeature_TEMP_GPU; + else { + // TODO: use all correct error codes + return -EEXIST; + } + + err = wmi_other_method_get_value(featured_id, &res); + if (!err) + *temperature = res; + return err; +} + +static ssize_t read_fanspeed(struct legion_private *priv, int fan_id, + int *speed_rpm) +{ + // TODO: use enums or function pointers? + switch (priv->conf->access_method_fanspeed) { + case ACCESS_METHOD_EC: + return ec_read_fanspeed(&priv->ecram, priv->conf, fan_id, + speed_rpm); + case ACCESS_METHOD_ACPI: + return acpi_read_fanspeed(priv, fan_id, speed_rpm); + case ACCESS_METHOD_WMI: + return wmi_read_fanspeed_gz(fan_id, speed_rpm); + case ACCESS_METHOD_WMI2: + return wmi_read_fanspeed(fan_id, speed_rpm); + case ACCESS_METHOD_WMI3: + return wmi_read_fanspeed_other(fan_id, speed_rpm); + default: + pr_info("No access method for fanspeed: %d\n", + priv->conf->access_method_fanspeed); + return -EINVAL; + } +} + +static ssize_t read_temperature(struct legion_private *priv, int sensor_id, + int *temperature) +{ + // TODO: use enums or function pointers? + switch (priv->conf->access_method_temperature) { + case ACCESS_METHOD_EC: + return ec_read_temperature(&priv->ecram, priv->conf, sensor_id, + temperature); + case ACCESS_METHOD_ACPI: + return acpi_read_temperature(priv, sensor_id, temperature); + case ACCESS_METHOD_WMI: + return wmi_read_temperature_gz(sensor_id, temperature); + case ACCESS_METHOD_WMI2: + return wmi_read_temperature(sensor_id, temperature); + case ACCESS_METHOD_WMI3: + return wmi_read_temperature_other(sensor_id, temperature); + default: + pr_info("No access method for temperature: %d\n", + priv->conf->access_method_temperature); + return -EINVAL; + } +} + +/* ============================= */ +/* Fancurve reading/writing */ +/* ============================= */ + +/* Fancurve from WMI + * This allows changing fewer parameters. + * It is only available on newer models. + */ + +struct WMIFanTable { + u8 FSTM; //FSMD + u8 FSID; + u32 FSTL; //FSST + u16 FSS0; + u16 FSS1; + u16 FSS2; + u16 FSS3; + u16 FSS4; + u16 FSS5; + u16 FSS6; + u16 FSS7; + u16 FSS8; + u16 FSS9; +} __packed; + +struct WMIFanTableRead { + u32 FSFL; + u32 FSS0; + u32 FSS1; + u32 FSS2; + u32 FSS3; + u32 FSS4; + u32 FSS5; + u32 FSS6; + u32 FSS7; + u32 FSS8; + u32 FSS9; + u32 FSSA; +} __packed; + +static ssize_t wmi_read_fancurve_custom(const struct model_config *model, + struct fancurve *fancurve) +{ + u8 buffer[88]; + int err; + + // The output buffer from the ACPI call is 88 bytes and larger + // than the returned object + pr_info("Size of object: %lu\n", sizeof(struct WMIFanTableRead)); + err = wmi_exec_noarg_ints(WMI_GUID_LENOVO_FAN_METHOD, 0, + WMI_METHOD_ID_FAN_GET_TABLE, buffer, + sizeof(buffer)); + print_hex_dump(KERN_INFO, "legion_laptop fan table wmi buffer", + DUMP_PREFIX_ADDRESS, 16, 1, buffer, sizeof(buffer), + true); + if (!err) { + struct WMIFanTableRead *fantable = + (struct WMIFanTableRead *)&buffer[0]; + fancurve->current_point_i = 0; + fancurve->size = 10; + fancurve->points[0].rpm1_raw = fantable->FSS0; + fancurve->points[1].rpm1_raw = fantable->FSS1; + fancurve->points[2].rpm1_raw = fantable->FSS2; + fancurve->points[3].rpm1_raw = fantable->FSS3; + fancurve->points[4].rpm1_raw = fantable->FSS4; + fancurve->points[5].rpm1_raw = fantable->FSS5; + fancurve->points[6].rpm1_raw = fantable->FSS6; + fancurve->points[7].rpm1_raw = fantable->FSS7; + fancurve->points[8].rpm1_raw = fantable->FSS8; + fancurve->points[9].rpm1_raw = fantable->FSS9; + //fancurve->points[10].rpm1_raw = fantable->FSSA; + } + return err; +} + +static ssize_t wmi_write_fancurve_custom(const struct model_config *model, + const struct fancurve *fancurve) +{ + u8 buffer[0x20]; + int err; + + // The buffer is read like this in ACPI firmware + // + // CreateByteField (Arg2, Zero, FSTM) + // CreateByteField (Arg2, One, FSID) + // CreateDWordField (Arg2, 0x02, FSTL) + // CreateByteField (Arg2, 0x06, FSS0) + // CreateByteField (Arg2, 0x08, FSS1) + // CreateByteField (Arg2, 0x0A, FSS2) + // CreateByteField (Arg2, 0x0C, FSS3) + // CreateByteField (Arg2, 0x0E, FSS4) + // CreateByteField (Arg2, 0x10, FSS5) + // CreateByteField (Arg2, 0x12, FSS6) + // CreateByteField (Arg2, 0x14, FSS7) + // CreateByteField (Arg2, 0x16, FSS8) + // CreateByteField (Arg2, 0x18, FSS9) + + memset(buffer, 0, sizeof(buffer)); + buffer[0x06] = fancurve->points[0].rpm1_raw; + buffer[0x08] = fancurve->points[1].rpm1_raw; + buffer[0x0A] = fancurve->points[2].rpm1_raw; + buffer[0x0C] = fancurve->points[3].rpm1_raw; + buffer[0x0E] = fancurve->points[4].rpm1_raw; + buffer[0x10] = fancurve->points[5].rpm1_raw; + buffer[0x12] = fancurve->points[6].rpm1_raw; + buffer[0x14] = fancurve->points[7].rpm1_raw; + buffer[0x16] = fancurve->points[8].rpm1_raw; + buffer[0x18] = fancurve->points[9].rpm1_raw; + + print_hex_dump(KERN_INFO, "legion_laptop fan table wmi write buffer", + DUMP_PREFIX_ADDRESS, 16, 1, buffer, sizeof(buffer), + true); + err = wmi_exec_arg(WMI_GUID_LENOVO_FAN_METHOD, 0, + WMI_METHOD_ID_FAN_SET_TABLE, buffer, sizeof(buffer)); + return err; +} + +/* Read the fan curve from the EC. + * + * In newer models (>=2022) there is an ACPI/WMI to read fan curve as + * a whole. So read/write fan table as a whole to use the + * same interface for both cases. + * + * It reads all points from EC memory, even if stored fancurve is smaller, so + * it can contain 0 entries. + */ +static int ec_read_fancurve_legion(struct ecram *ecram, + const struct model_config *model, + struct fancurve *fancurve) +{ + size_t i = 0; + + for (i = 0; i < MAXFANCURVESIZE; ++i) { + struct fancurve_point *point = &fancurve->points[i]; + + point->rpm1_raw = + ecram_read(ecram, model->registers->EXT_FAN1_BASE + i); + point->rpm2_raw = + ecram_read(ecram, model->registers->EXT_FAN2_BASE + i); + + point->accel = ecram_read( + ecram, model->registers->EXT_FAN_ACC_BASE + i); + point->decel = ecram_read( + ecram, model->registers->EXT_FAN_DEC_BASE + i); + point->cpu_max_temp_celsius = + ecram_read(ecram, model->registers->EXT_CPU_TEMP + i); + point->cpu_min_temp_celsius = ecram_read( + ecram, model->registers->EXT_CPU_TEMP_HYST + i); + point->gpu_max_temp_celsius = + ecram_read(ecram, model->registers->EXT_GPU_TEMP + i); + point->gpu_min_temp_celsius = ecram_read( + ecram, model->registers->EXT_GPU_TEMP_HYST + i); + point->ic_max_temp_celsius = + ecram_read(ecram, model->registers->EXT_VRM_TEMP + i); + point->ic_min_temp_celsius = ecram_read( + ecram, model->registers->EXT_VRM_TEMP_HYST + i); + } + + // Do not trust that hardware; It might suddenly report + // a larger size, so clamp it. + fancurve->size = + ecram_read(ecram, model->registers->EXT_FAN_POINTS_SIZE); + fancurve->size = + min(fancurve->size, (typeof(fancurve->size))(MAXFANCURVESIZE)); + fancurve->current_point_i = + ecram_read(ecram, model->registers->EXT_FAN_CUR_POINT); + fancurve->current_point_i = + min(fancurve->current_point_i, fancurve->size); + return 0; +} + +static int ec_write_fancurve_legion(struct ecram *ecram, + const struct model_config *model, + const struct fancurve *fancurve, + bool write_size) +{ + size_t i; + + // Reset fan update counters (try to avoid any race conditions) + ecram_write(ecram, 0xC5FE, 0); + ecram_write(ecram, 0xC5FF, 0); + for (i = 0; i < MAXFANCURVESIZE; ++i) { + // Entries for points larger than fancurve size should be cleared + // to 0 + const struct fancurve_point *point = + i < fancurve->size ? &fancurve->points[i] : + &fancurve_point_zero; + + ecram_write(ecram, model->registers->EXT_FAN1_BASE + i, + point->rpm1_raw); + ecram_write(ecram, model->registers->EXT_FAN2_BASE + i, + point->rpm2_raw); + + ecram_write(ecram, model->registers->EXT_FAN_ACC_BASE + i, + point->accel); + ecram_write(ecram, model->registers->EXT_FAN_DEC_BASE + i, + point->decel); + + ecram_write(ecram, model->registers->EXT_CPU_TEMP + i, + point->cpu_max_temp_celsius); + ecram_write(ecram, model->registers->EXT_CPU_TEMP_HYST + i, + point->cpu_min_temp_celsius); + ecram_write(ecram, model->registers->EXT_GPU_TEMP + i, + point->gpu_max_temp_celsius); + ecram_write(ecram, model->registers->EXT_GPU_TEMP_HYST + i, + point->gpu_min_temp_celsius); + ecram_write(ecram, model->registers->EXT_VRM_TEMP + i, + point->ic_max_temp_celsius); + ecram_write(ecram, model->registers->EXT_VRM_TEMP_HYST + i, + point->ic_min_temp_celsius); + } + + if (write_size) { + ecram_write(ecram, model->registers->EXT_FAN_POINTS_SIZE, + fancurve->size); + } + + // Reset current fan level to 0, so algorithm in EC + // selects fan curve point again and resetting hysterisis + // effects + ecram_write(ecram, model->registers->EXT_FAN_CUR_POINT, 0); + + // Reset internal fan levels + ecram_write(ecram, 0xC634, 0); // CPU + ecram_write(ecram, 0xC635, 0); // GPU + ecram_write(ecram, 0xC636, 0); // SENSOR + + return 0; +} + +#define FANCURVESIZE_IDEAPDAD 8 + +static int ec_read_fancurve_ideapad(struct ecram *ecram, + const struct model_config *model, + struct fancurve *fancurve) +{ + size_t i = 0; + + for (i = 0; i < FANCURVESIZE_IDEAPDAD; ++i) { + struct fancurve_point *point = &fancurve->points[i]; + + point->rpm1_raw = + ecram_read(ecram, model->registers->EXT_FAN1_BASE + i); + point->rpm2_raw = + ecram_read(ecram, model->registers->EXT_FAN2_BASE + i); + + point->accel = 0; + point->decel = 0; + point->cpu_max_temp_celsius = + ecram_read(ecram, model->registers->EXT_CPU_TEMP + i); + point->cpu_min_temp_celsius = ecram_read( + ecram, model->registers->EXT_CPU_TEMP_HYST + i); + point->gpu_max_temp_celsius = + ecram_read(ecram, model->registers->EXT_GPU_TEMP + i); + point->gpu_min_temp_celsius = ecram_read( + ecram, model->registers->EXT_GPU_TEMP_HYST + i); + point->ic_max_temp_celsius = 0; + point->ic_min_temp_celsius = 0; + } + + // Do not trust that hardware; It might suddenly report + // a larger size, so clamp it. + fancurve->size = FANCURVESIZE_IDEAPDAD; + fancurve->current_point_i = + ecram_read(ecram, model->registers->EXT_FAN_CUR_POINT); + fancurve->current_point_i = + min(fancurve->current_point_i, fancurve->size); + return 0; +} + +static int ec_write_fancurve_ideapad(struct ecram *ecram, + const struct model_config *model, + const struct fancurve *fancurve) +{ + size_t i; + int valr1; + int valr2; + + // add this later: maybe other addresses needed + // therefore, fan curve might not be effective immediately but + // only after temp change + // Reset fan update counters (try to avoid any race conditions) + ecram_write(ecram, 0xC5FE, 0); + ecram_write(ecram, 0xC5FF, 0); + for (i = 0; i < FANCURVESIZE_IDEAPDAD; ++i) { + const struct fancurve_point *point = &fancurve->points[i]; + + ecram_write(ecram, model->registers->EXT_FAN1_BASE + i, + point->rpm1_raw); + valr1 = ecram_read(ecram, model->registers->EXT_FAN1_BASE + i); + ecram_write(ecram, model->registers->EXT_FAN2_BASE + i, + point->rpm2_raw); + valr2 = ecram_read(ecram, model->registers->EXT_FAN2_BASE + i); + pr_info("Writing fan1: %d; reading fan1: %d\n", point->rpm1_raw, + valr1); + pr_info("Writing fan2: %d; reading fan2: %d\n", point->rpm2_raw, + valr2); + + // write to memory and repeat 8 bytes later again + ecram_write(ecram, model->registers->EXT_CPU_TEMP + i, + point->cpu_max_temp_celsius); + ecram_write(ecram, model->registers->EXT_CPU_TEMP + 8 + i, + point->cpu_max_temp_celsius); + // write to memory and repeat 8 bytes later again + ecram_write(ecram, model->registers->EXT_CPU_TEMP_HYST + i, + point->cpu_min_temp_celsius); + ecram_write(ecram, model->registers->EXT_CPU_TEMP_HYST + 8 + i, + point->cpu_min_temp_celsius); + // write to memory and repeat 8 bytes later again + ecram_write(ecram, model->registers->EXT_GPU_TEMP + i, + point->gpu_max_temp_celsius); + ecram_write(ecram, model->registers->EXT_GPU_TEMP + 8 + i, + point->gpu_max_temp_celsius); + // write to memory and repeat 8 bytes later again + ecram_write(ecram, model->registers->EXT_GPU_TEMP_HYST + i, + point->gpu_min_temp_celsius); + ecram_write(ecram, model->registers->EXT_GPU_TEMP_HYST + 8 + i, + point->gpu_min_temp_celsius); + } + + // add this later: maybe other addresses needed + // therefore, fan curve might not be effective immediately but + // only after temp change + // // Reset current fan level to 0, so algorithm in EC + // // selects fan curve point again and resetting hysterisis + // // effects + // ecram_write(ecram, model->registers->EXT_FAN_CUR_POINT, 0); + + // // Reset internal fan levels + // ecram_write(ecram, 0xC634, 0); // CPU + // ecram_write(ecram, 0xC635, 0); // GPU + // ecram_write(ecram, 0xC636, 0); // SENSOR + + return 0; +} + +#define FANCURVESIZE_LOQ 10 + +static int ec_read_fancurve_loq(struct ecram *ecram, + const struct model_config *model, + struct fancurve *fancurve) +{ + size_t i = 0; + size_t struct_offset = 3; // {cpu_temp: u8, rpm: u8, gpu_temp?: u8} + + for (i = 0; i < FANCURVESIZE_LOQ; ++i) { + struct fancurve_point *point = &fancurve->points[i]; + + point->rpm1_raw = + ecram_read(ecram, model->registers->EXT_FAN1_BASE + (i * struct_offset)); + point->rpm2_raw = + ecram_read(ecram, model->registers->EXT_FAN2_BASE + (i * struct_offset)); + + point->accel = 0; + point->decel = 0; + point->cpu_max_temp_celsius = + ecram_read(ecram, model->registers->EXT_CPU_TEMP + (i * struct_offset)); + point->gpu_max_temp_celsius = + ecram_read(ecram, model->registers->EXT_GPU_TEMP + (i * struct_offset)); + point->cpu_min_temp_celsius = 0; + point->gpu_min_temp_celsius = 0; + point->ic_max_temp_celsius = 0; + point->ic_min_temp_celsius = 0; + } + + fancurve->size = FANCURVESIZE_LOQ; + fancurve->current_point_i = + ecram_read(ecram, model->registers->EXT_FAN_CUR_POINT); + fancurve->current_point_i = + min(fancurve->current_point_i, fancurve->size); + return 0; +} + +static int ec_write_fancurve_loq(struct ecram *ecram, + const struct model_config *model, + const struct fancurve *fancurve) +{ + size_t i; + int valr1; + int valr2; + size_t struct_offset = 3; // {cpu_temp: u8, rpm: u8, gpu_temp?: u8} + + for (i = 0; i < FANCURVESIZE_LOQ; ++i) { + const struct fancurve_point *point = &fancurve->points[i]; + + ecram_write(ecram, model->registers->EXT_FAN1_BASE + (i * struct_offset), + point->rpm1_raw); + valr1 = ecram_read(ecram, model->registers->EXT_FAN1_BASE + (i * struct_offset)); + ecram_write(ecram, model->registers->EXT_FAN2_BASE + (i * struct_offset), + point->rpm2_raw); + valr2 = ecram_read(ecram, model->registers->EXT_FAN2_BASE + (i * struct_offset)); + pr_info("Writing fan1: %d; reading fan1: %d\n", point->rpm1_raw, + valr1); + pr_info("Writing fan2: %d; reading fan2: %d\n", point->rpm2_raw, + valr2); + + // write to memory and repeat 8 bytes later again + ecram_write(ecram, model->registers->EXT_CPU_TEMP + (i * struct_offset), + point->cpu_max_temp_celsius); + // write to memory and repeat 8 bytes later again + ecram_write(ecram, model->registers->EXT_GPU_TEMP + (i * struct_offset), + point->gpu_max_temp_celsius); + } + + return 0; +} + +static int read_fancurve(struct legion_private *priv, struct fancurve *fancurve) +{ + // TODO: use enums or function pointers? + switch (priv->conf->access_method_fancurve) { + case ACCESS_METHOD_EC: + return ec_read_fancurve_legion(&priv->ecram, priv->conf, + fancurve); + case ACCESS_METHOD_EC2: + return ec_read_fancurve_ideapad(&priv->ecram, priv->conf, + fancurve); + case ACCESS_METHOD_EC3: + return ec_read_fancurve_loq(&priv->ecram, priv->conf, + fancurve); + case ACCESS_METHOD_WMI3: + return wmi_read_fancurve_custom(priv->conf, fancurve); + default: + pr_info("No access method for fancurve: %d\n", + priv->conf->access_method_fancurve); + return -EINVAL; + } +} + +static int write_fancurve(struct legion_private *priv, + const struct fancurve *fancurve, bool write_size) +{ + // TODO: use enums or function pointers? + switch (priv->conf->access_method_fancurve) { + case ACCESS_METHOD_EC: + return ec_write_fancurve_legion(&priv->ecram, priv->conf, + fancurve, write_size); + case ACCESS_METHOD_EC2: + return ec_write_fancurve_ideapad(&priv->ecram, priv->conf, + fancurve); + case ACCESS_METHOD_EC3: + return ec_write_fancurve_loq(&priv->ecram, priv->conf, + fancurve); + case ACCESS_METHOD_WMI3: + return wmi_write_fancurve_custom(priv->conf, fancurve); + default: + pr_info("No access method for fancurve: %d\n", + priv->conf->access_method_fancurve); + return -EINVAL; + } +} + +#define MINIFANCUVE_ON_COOL_ON 0x04 +#define MINIFANCUVE_ON_COOL_OFF 0xA0 + +static int ec_read_minifancurve(struct ecram *ecram, + const struct model_config *model, bool *state) +{ + int value = + ecram_read(ecram, model->registers->EXT_MINIFANCURVE_ON_COOL); + + switch (value) { + case MINIFANCUVE_ON_COOL_ON: + *state = true; + break; + case MINIFANCUVE_ON_COOL_OFF: + *state = false; + break; + default: + pr_info("Unexpected value in MINIFANCURVE register: %d\n", + value); + return -1; + } + return 0; +} + +static ssize_t ec_write_minifancurve(struct ecram *ecram, + const struct model_config *model, + bool state) +{ + u8 val = state ? MINIFANCUVE_ON_COOL_ON : MINIFANCUVE_ON_COOL_OFF; + + ecram_write(ecram, model->registers->EXT_MINIFANCURVE_ON_COOL, val); + return 0; +} + +#define EC_LOCKFANCONTROLLER_ON 8 +#define EC_LOCKFANCONTROLLER_OFF 0 + +static ssize_t ec_write_lockfancontroller(struct ecram *ecram, + const struct model_config *model, + bool state) +{ + u8 val = state ? EC_LOCKFANCONTROLLER_ON : EC_LOCKFANCONTROLLER_OFF; + + ecram_write(ecram, model->registers->EXT_LOCKFANCONTROLLER, val); + return 0; +} + +static int ec_read_lockfancontroller(struct ecram *ecram, + const struct model_config *model, + bool *state) +{ + int value = ecram_read(ecram, model->registers->EXT_LOCKFANCONTROLLER); + + switch (value) { + case EC_LOCKFANCONTROLLER_ON: + *state = true; + break; + case EC_LOCKFANCONTROLLER_OFF: + *state = false; + break; + default: + pr_info("Unexpected value in lockfanspeed register: %d\n", + value); + return -1; + } + return 0; +} + +#define EC_FANFULLSPEED_ON 0x40 +#define EC_FANFULLSPEED_OFF 0x00 + +static int ec_read_fanfullspeed(struct ecram *ecram, + const struct model_config *model, bool *state) +{ + int value = ecram_read(ecram, model->registers->EXT_MAXIMUMFANSPEED); + + switch (value) { + case EC_FANFULLSPEED_ON: + *state = true; + break; + case EC_FANFULLSPEED_OFF: + *state = false; + break; + default: + pr_info("Unexpected value in maximumfanspeed register: %d\n", + value); + return -1; + } + return 0; +} + +static ssize_t ec_write_fanfullspeed(struct ecram *ecram, + const struct model_config *model, + bool state) +{ + u8 val = state ? EC_FANFULLSPEED_ON : EC_FANFULLSPEED_OFF; + + ecram_write(ecram, model->registers->EXT_MAXIMUMFANSPEED, val); + return 0; +} + +static ssize_t wmi_read_fanfullspeed(struct legion_private *priv, bool *state) +{ + return get_simple_wmi_attribute_bool(priv, WMI_GUID_LENOVO_FAN_METHOD, + 0, WMI_METHOD_ID_FAN_GET_FULLSPEED, + false, 1, state); +} + +static ssize_t wmi_write_fanfullspeed(struct legion_private *priv, bool state) +{ + return set_simple_wmi_attribute(priv, WMI_GUID_LENOVO_FAN_METHOD, 0, + WMI_METHOD_ID_FAN_SET_FULLSPEED, false, + 1, state); +} + +static ssize_t read_fanfullspeed(struct legion_private *priv, bool *state) +{ + // TODO: use enums or function pointers? + switch (priv->conf->access_method_fanfullspeed) { + case ACCESS_METHOD_EC: + return ec_read_fanfullspeed(&priv->ecram, priv->conf, state); + case ACCESS_METHOD_WMI: + return wmi_read_fanfullspeed(priv, state); + default: + pr_info("No access method for fan full speed: %d\n", + priv->conf->access_method_fanfullspeed); + return -EINVAL; + } +} + +static ssize_t write_fanfullspeed(struct legion_private *priv, bool state) +{ + ssize_t res; + + switch (priv->conf->access_method_fanfullspeed) { + case ACCESS_METHOD_EC: + res = ec_write_fanfullspeed(&priv->ecram, priv->conf, state); + return res; + case ACCESS_METHOD_WMI: + return wmi_write_fanfullspeed(priv, state); + default: + pr_info("No access method for fan full speed: %d\n", + priv->conf->access_method_fanfullspeed); + return -EINVAL; + } +} + +/* ============================= */ +/* Power mode reading/writing */ +/* ============================= */ + +enum legion_ec_powermode { + LEGION_EC_POWERMODE_QUIET = 2, + LEGION_EC_POWERMODE_BALANCED = 0, + LEGION_EC_POWERMODE_PERFORMANCE = 1, + LEGION_EC_POWERMODE_CUSTOM = 3 +}; + +enum legion_wmi_powermode { + LEGION_WMI_POWERMODE_QUIET = 1, + LEGION_WMI_POWERMODE_BALANCED = 2, + LEGION_WMI_POWERMODE_PERFORMANCE = 3, + LEGION_WMI_POWERMODE_CUSTOM = 255 +}; + +enum legion_wmi_powermode ec_to_wmi_powermode(int ec_mode) +{ + switch (ec_mode) { + case LEGION_EC_POWERMODE_QUIET: + return LEGION_WMI_POWERMODE_QUIET; + case LEGION_EC_POWERMODE_BALANCED: + return LEGION_WMI_POWERMODE_BALANCED; + case LEGION_EC_POWERMODE_PERFORMANCE: + return LEGION_WMI_POWERMODE_PERFORMANCE; + case LEGION_EC_POWERMODE_CUSTOM: + return LEGION_WMI_POWERMODE_CUSTOM; + default: + return LEGION_WMI_POWERMODE_BALANCED; + } +} + +enum legion_ec_powermode wmi_to_ec_powermode(enum legion_wmi_powermode wmi_mode) +{ + switch (wmi_mode) { + case LEGION_WMI_POWERMODE_QUIET: + return LEGION_EC_POWERMODE_QUIET; + case LEGION_WMI_POWERMODE_BALANCED: + return LEGION_EC_POWERMODE_BALANCED; + case LEGION_WMI_POWERMODE_PERFORMANCE: + return LEGION_EC_POWERMODE_PERFORMANCE; + case LEGION_WMI_POWERMODE_CUSTOM: + return LEGION_EC_POWERMODE_CUSTOM; + default: + return LEGION_EC_POWERMODE_BALANCED; + } +} + +static ssize_t ec_read_powermode(struct legion_private *priv, int *powermode) +{ + *powermode = + ecram_read(&priv->ecram, priv->conf->registers->EXT_POWERMODE); + return 0; +} + +static ssize_t ec_write_powermode(struct legion_private *priv, u8 value) +{ + if (!((value >= 0 && value <= 2) || value == 255)) { + pr_info("Unexpected power mode value ignored: %d\n", value); + return -ENOMEM; + } + ecram_write(&priv->ecram, priv->conf->registers->EXT_POWERMODE, value); + return 0; +} + +static ssize_t acpi_read_powermode(struct legion_private *priv, int *powermode) +{ + unsigned long acpi_powermode; + int err; + + // spmo method not always available + // \_SB.PCI0.LPC0.EC0.SPMO + err = eval_spmo(priv->adev->handle, &acpi_powermode); + *powermode = (int)acpi_powermode; + return err; +} + +static ssize_t wmi_read_powermode(int *powermode) +{ + int err; + unsigned long res; + + err = wmi_exec_noarg_int(LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_GETSMARTFANMODE, &res); + + if (!err) + *powermode = res; + return err; +} + +static ssize_t wmi_write_powermode(u8 value) +{ + if (!((value >= LEGION_WMI_POWERMODE_QUIET && + value <= LEGION_WMI_POWERMODE_PERFORMANCE) || + value == LEGION_WMI_POWERMODE_CUSTOM)) { + pr_info("Unexpected power mode value ignored: %d\n", value); + return -ENOMEM; + } + return wmi_exec_arg(LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_SETSMARTFANMODE, &value, + sizeof(value)); +} + +static ssize_t read_powermode(struct legion_private *priv, int *powermode) +{ + ssize_t res; + + switch (priv->conf->access_method_powermode) { + case ACCESS_METHOD_EC: + res = ec_read_powermode(priv, powermode); + *powermode = ec_to_wmi_powermode(*powermode); + return res; + case ACCESS_METHOD_ACPI: + return acpi_read_powermode(priv, powermode); + case ACCESS_METHOD_WMI: + return wmi_read_powermode(powermode); + default: + pr_info("No access method for powermode: %d\n", + priv->conf->access_method_powermode); + return -EINVAL; + } +} + +static ssize_t write_powermode(struct legion_private *priv, + enum legion_wmi_powermode value) +{ + ssize_t res; + + //TODO: remove again + pr_info("Set powermode\n"); + + switch (priv->conf->access_method_powermode) { + case ACCESS_METHOD_EC: + res = ec_write_powermode(priv, wmi_to_ec_powermode(value)); + return res; + case ACCESS_METHOD_WMI: + return wmi_write_powermode(value); + default: + pr_info("No access method for powermode: %d\n", + priv->conf->access_method_powermode); + return -EINVAL; + } +} + +/** + * Shortly toggle powermode to a different mode + * and switch back, e.g. to reset fan curve. + */ +static void toggle_powermode(struct legion_private *priv) +{ + int old_powermode; + int next_powermode; + + read_powermode(priv, &old_powermode); + next_powermode = old_powermode == 0 ? 1 : 0; + + write_powermode(priv, next_powermode); + mdelay(1500); + write_powermode(priv, old_powermode); +} + +/* ============================= */ +/* Charging mode reading/writing */ +/* ============================- */ + +#define FCT_RAPID_CHARGE_ON 0x07 +#define FCT_RAPID_CHARGE_OFF 0x08 +#define RAPID_CHARGE_ON 0x0 +#define RAPID_CHARGE_OFF 0x1 + +static int acpi_read_rapidcharge(struct acpi_device *adev, bool *state) +{ + unsigned long result; + int err; + + //also works? which one is better? + /* + * err = eval_qcho(adev->handle, &result); + * if (err) + * return err; + * state = result; + * return 0; + */ + + err = eval_gbmd(adev->handle, &result); + if (err) + return err; + + *state = result & 0x04; + return 0; +} + +static int acpi_write_rapidcharge(struct acpi_device *adev, bool state) +{ + int err; + unsigned long fct_nr = state > 0 ? FCT_RAPID_CHARGE_ON : + FCT_RAPID_CHARGE_OFF; + + err = exec_sbmc(adev->handle, fct_nr); + pr_info("Set rapidcharge to %d by calling %lu: result: %d\n", state, + fct_nr, err); + return err; +} + +/* ============================= */ +/* Keyboard backlight read/write */ +/* ============================= */ + +static ssize_t legion_kbd_bl2_brightness_get(struct legion_private *priv) +{ + unsigned long state = 0; + int err; + + err = wmi_exec_noarg_int(LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_GETKEYBOARDLIGHT, &state); + if (err) + return -EINVAL; + + return state; +} + +//static int legion_kbd_bl2_brightness_set(struct legion_private *priv, +// unsigned int brightness) +//{ +// u8 in_param = brightness; + +// return wmi_exec_arg(LEGION_WMI_GAMEZONE_GUID, 0, +// WMI_METHOD_ID_SETKEYBOARDLIGHT, &in_param, +// sizeof(in_param)); +//} + +//min: 1, max: 3 +#define LIGHT_ID_KEYBOARD 0x00 +//min: 0, max: 1 +#define LIGHT_ID_YLOGO 0x03 +//min: 1, max: 2 +#define LIGHT_ID_IOPORT 0x05 + +static int legion_wmi_light_get(struct legion_private *priv, u8 light_id, + unsigned int min_value, unsigned int max_value) +{ + struct acpi_buffer params; + u8 in; + u8 result[2]; + u8 value; + int err; + + params.length = 1; + params.pointer = ∈ + in = light_id; + err = wmi_exec_ints(LEGION_WMI_KBBACKLIGHT_GUID, 0, + WMI_METHOD_ID_KBBACKLIGHTGET, ¶ms, result, + ARRAY_SIZE(result)); + if (err) { + pr_info("Error for WMI method call to get brightness\n"); + return -EIO; + } + + value = result[1]; + if (!(value >= min_value && value <= max_value)) { + pr_info("Error WMI call for reading brightness: expected a value between %u and %u, but got %d\n", + min_value, max_value, value); + return -EFAULT; + } + + return value - min_value; +} + +static int legion_wmi_light_set(struct legion_private *priv, u8 light_id, + unsigned int min_value, unsigned int max_value, + unsigned int brightness) +{ + struct acpi_buffer buffer; + u8 in_buffer_param[8]; + unsigned long result; + int err; + + buffer.length = 3; + buffer.pointer = &in_buffer_param[0]; + in_buffer_param[0] = light_id; + in_buffer_param[1] = 0x01; + in_buffer_param[2] = + clamp(brightness + min_value, min_value, max_value); + + err = wmi_exec_int(LEGION_WMI_KBBACKLIGHT_GUID, 0, + WMI_METHOD_ID_KBBACKLIGHTSET, &buffer, &result); + if (err) { + pr_info("Error for WMI method call to set brightness on light: %d\n", + light_id); + return -EIO; + } + + return 0; +} + +static int legion_kbd_bl_brightness_get(struct legion_private *priv) +{ + return legion_wmi_light_get(priv, LIGHT_ID_KEYBOARD, 1, 3); +} + +static int legion_kbd_bl_brightness_set(struct legion_private *priv, + unsigned int brightness) +{ + return legion_wmi_light_set(priv, LIGHT_ID_KEYBOARD, 1, 3, brightness); +} + +/* ============================= */ +/* debugfs interface */ +/* ============================ */ + +static int debugfs_ecmemory_show(struct seq_file *s, void *unused) +{ + struct legion_private *priv = s->private; + size_t offset; + + for (offset = 0; offset < priv->conf->memoryio_size; ++offset) { + char value = ecram_read(&priv->ecram, + priv->conf->memoryio_physical_ec_start + + offset); + + seq_write(s, &value, 1); + } + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(debugfs_ecmemory); + +static int debugfs_ecmemoryram_show(struct seq_file *s, void *unused) +{ + struct legion_private *priv = s->private; + size_t offset; + ssize_t err; + u8 value; + + for (offset = 0; offset < priv->conf->ramio_size; ++offset) { + err = ecram_memoryio_read(&priv->ec_memoryio, offset, &value); + if (!err) + seq_write(s, &value, 1); + else + return -EACCES; + } + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(debugfs_ecmemoryram); + +//TODO: make (almost) all methods static + +static void seq_file_print_with_error(struct seq_file *s, const char *name, + ssize_t err, int value) +{ + seq_printf(s, "%s error: %ld\n", name, err); + seq_printf(s, "%s: %d\n", name, value); +} + +static int debugfs_fancurve_show(struct seq_file *s, void *unused) +{ + struct legion_private *priv = s->private; + bool is_minifancurve; + bool is_lockfancontroller; + bool is_maximumfanspeed; + bool is_rapidcharge = false; + int powermode; + int temperature; + int fanspeed; + int err; + unsigned long cfg; + struct fancurve wmi_fancurve; + //int kb_backlight; + + mutex_lock(&priv->fancurve_mutex); + + seq_printf(s, "EC Chip ID: %x\n", read_ec_id(&priv->ecram, priv->conf)); + seq_printf(s, "EC Chip Version: %x\n", + read_ec_version(&priv->ecram, priv->conf)); + seq_printf(s, "legion_laptop features: %s\n", LEGIONFEATURES); + seq_printf(s, "legion_laptop ec_readonly: %d\n", ec_readonly); + + err = eval_int(priv->adev->handle, "VPC0._CFG", &cfg); + seq_printf(s, "ACPI CFG error: %d\n", err); + seq_printf(s, "ACPI CFG: %lu\n", cfg); + + seq_printf(s, "temperature access method: %d\n", + priv->conf->access_method_temperature); + err = read_temperature(priv, 0, &temperature); + seq_file_print_with_error(s, "CPU temperature", err, temperature); + err = ec_read_temperature(&priv->ecram, priv->conf, 0, &temperature); + seq_file_print_with_error(s, "CPU temperature EC", err, temperature); + err = acpi_read_temperature(priv, 0, &temperature); + seq_file_print_with_error(s, "CPU temperature ACPI", err, temperature); + err = wmi_read_temperature_gz(0, &temperature); + seq_file_print_with_error(s, "CPU temperature WMI", err, temperature); + err = wmi_read_temperature(0, &temperature); + seq_file_print_with_error(s, "CPU temperature WMI2", err, temperature); + err = wmi_read_temperature_other(0, &temperature); + seq_file_print_with_error(s, "CPU temperature WMI3", err, temperature); + + err = read_temperature(priv, 1, &temperature); + seq_file_print_with_error(s, "GPU temperature", err, temperature); + err = ec_read_temperature(&priv->ecram, priv->conf, 1, &temperature); + seq_file_print_with_error(s, "GPU temperature EC", err, temperature); + err = acpi_read_temperature(priv, 1, &temperature); + seq_file_print_with_error(s, "GPU temperature ACPI", err, temperature); + err = wmi_read_temperature_gz(1, &temperature); + seq_file_print_with_error(s, "GPU temperature WMI", err, temperature); + err = wmi_read_temperature(1, &temperature); + seq_file_print_with_error(s, "GPU temperature WMI2", err, temperature); + err = wmi_read_temperature_other(1, &temperature); + seq_file_print_with_error(s, "GPU temperature WMI3", err, temperature); + + seq_printf(s, "fan speed access method: %d\n", + priv->conf->access_method_fanspeed); + err = read_fanspeed(priv, 0, &fanspeed); + seq_file_print_with_error(s, "1 fanspeed", err, fanspeed); + err = ec_read_fanspeed(&priv->ecram, priv->conf, 0, &fanspeed); + seq_file_print_with_error(s, "1 fanspeed EC", err, fanspeed); + err = acpi_read_fanspeed(priv, 0, &fanspeed); + seq_file_print_with_error(s, "1 fanspeed ACPI", err, fanspeed); + err = wmi_read_fanspeed_gz(0, &fanspeed); + seq_file_print_with_error(s, "1 fanspeed WMI", err, fanspeed); + err = wmi_read_fanspeed(0, &fanspeed); + seq_file_print_with_error(s, "1 fanspeed WMI2", err, fanspeed); + err = wmi_read_fanspeed_other(0, &fanspeed); + seq_file_print_with_error(s, "1 fanspeed WMI3", err, fanspeed); + + err = read_fanspeed(priv, 1, &fanspeed); + seq_file_print_with_error(s, "2 fanspeed", err, fanspeed); + err = ec_read_fanspeed(&priv->ecram, priv->conf, 1, &fanspeed); + seq_file_print_with_error(s, "2 fanspeed EC", err, fanspeed); + err = acpi_read_fanspeed(priv, 1, &fanspeed); + seq_file_print_with_error(s, "2 fanspeed ACPI", err, fanspeed); + err = wmi_read_fanspeed_gz(1, &fanspeed); + seq_file_print_with_error(s, "2 fanspeed WMI", err, fanspeed); + err = wmi_read_fanspeed(1, &fanspeed); + seq_file_print_with_error(s, "2 fanspeed WMI2", err, fanspeed); + err = wmi_read_fanspeed_other(1, &fanspeed); + seq_file_print_with_error(s, "2 fanspeed WMI3", err, fanspeed); + + seq_printf(s, "powermode access method: %d\n", + priv->conf->access_method_powermode); + err = read_powermode(priv, &powermode); + seq_file_print_with_error(s, "powermode", err, powermode); + err = ec_read_powermode(priv, &powermode); + seq_file_print_with_error(s, "powermode EC", err, powermode); + err = acpi_read_powermode(priv, &powermode); + seq_file_print_with_error(s, "powermode ACPI", err, powermode); + err = wmi_read_powermode(&powermode); + seq_file_print_with_error(s, "powermode WMI", err, powermode); + seq_printf(s, "has custom powermode: %d\n", + priv->conf->has_custom_powermode); + + err = acpi_read_rapidcharge(priv->adev, &is_rapidcharge); + seq_printf(s, "ACPI rapidcharge error: %d\n", err); + seq_printf(s, "ACPI rapidcharge: %d\n", is_rapidcharge); + + seq_printf(s, "WMI backlight 2 state: %ld\n", + legion_kbd_bl2_brightness_get(priv)); + seq_printf(s, "WMI backlight 3 state: %d\n", + legion_kbd_bl_brightness_get(priv)); + + seq_printf(s, "WMI light IO port: %d\n", + legion_wmi_light_get(priv, LIGHT_ID_IOPORT, 0, 4)); + + seq_printf(s, "WMI light Y logo/lid: %d\n", + legion_wmi_light_get(priv, LIGHT_ID_YLOGO, 0, 4)); + + seq_printf(s, "EC minifancurve feature enabled: %d\n", + priv->conf->has_minifancurve); + err = ec_read_minifancurve(&priv->ecram, priv->conf, &is_minifancurve); + seq_printf(s, "EC minifancurve on cool: %s\n", + err ? "error" : (is_minifancurve ? "true" : "false")); + + err = ec_read_lockfancontroller(&priv->ecram, priv->conf, + &is_lockfancontroller); + seq_printf(s, "EC lockfancontroller error: %d\n", err); + seq_printf(s, "EC lockfancontroller: %s\n", + err ? "error" : (is_lockfancontroller ? "true" : "false")); + + err = read_fanfullspeed(priv, &is_maximumfanspeed); + seq_file_print_with_error(s, "fanfullspeed", err, is_maximumfanspeed); + + err = ec_read_fanfullspeed(&priv->ecram, priv->conf, + &is_maximumfanspeed); + seq_file_print_with_error(s, "fanfullspeed EC", err, + is_maximumfanspeed); + + read_fancurve(priv, &priv->fancurve); + seq_printf(s, "EC fan curve current point id: %ld\n", + priv->fancurve.current_point_i); + seq_printf(s, "EC fan curve points size: %ld\n", priv->fancurve.size); + + seq_puts(s, "Current fan curve in hardware:\n"); + fancurve_print_seqfile(&priv->fancurve, s); + seq_puts(s, "=====================\n"); + mutex_unlock(&priv->fancurve_mutex); + + seq_puts(s, "Current fan curve in hardware (WMI; might be empty)\n"); + wmi_fancurve.size = 0; + err = wmi_read_fancurve_custom(priv->conf, &wmi_fancurve); + fancurve_print_seqfile(&wmi_fancurve, s); + seq_puts(s, "=====================\n"); + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(debugfs_fancurve); + +static void legion_debugfs_init(struct legion_private *priv) +{ + struct dentry *dir; + + // TODO: remove this note + // Note: like other kernel modules, do not catch errors here + // because if kernel is build without debugfs this + // will return an error but module still has to + // work, just without debugfs + // TODO: what permissions; some modules do 400 + // other do 444 + dir = debugfs_create_dir(LEGION_DRVR_SHORTNAME, NULL); + debugfs_create_file("fancurve", 0444, dir, priv, + &debugfs_fancurve_fops); + debugfs_create_file("ecmemory", 0444, dir, priv, + &debugfs_ecmemory_fops); + debugfs_create_file("ecmemoryram", 0444, dir, priv, + &debugfs_ecmemoryram_fops); + + priv->debugfs_dir = dir; +} + +static void legion_debugfs_exit(struct legion_private *priv) +{ + pr_info("Unloading legion dubugfs\n"); + // The following is does nothing if pointer is NULL + debugfs_remove_recursive(priv->debugfs_dir); + priv->debugfs_dir = NULL; + pr_info("Unloading legion dubugfs done\n"); +} + +/* ============================= */ +/* sysfs interface */ +/* ============================ */ + +static int show_simple_wmi_attribute(struct device *dev, + struct device_attribute *attr, char *buf, + const char *guid, u8 instance, + u32 method_id, bool invert, + unsigned long scale) +{ + unsigned long state = 0; + int err; + struct legion_private *priv = dev_get_drvdata(dev); + + mutex_lock(&priv->fancurve_mutex); + err = get_simple_wmi_attribute(priv, guid, instance, method_id, invert, + scale, &state); + mutex_unlock(&priv->fancurve_mutex); + + if (err) + return -EINVAL; + + return sysfs_emit(buf, "%lu\n", state); +} + +static int show_simple_wmi_attribute_from_buffer(struct device *dev, + struct device_attribute *attr, + char *buf, const char *guid, + u8 instance, u32 method_id, + size_t ressize, size_t i, + int scale) +{ + u8 res[16]; + int err; + int out; + struct legion_private *priv = dev_get_drvdata(dev); + + if (ressize > ARRAY_SIZE(res)) { + pr_info("Buffer too small for WMI result\n"); + return -EINVAL; + } + if (i >= ressize) { + pr_info("Index not within buffer size\n"); + return -EINVAL; + } + + mutex_lock(&priv->fancurve_mutex); + err = wmi_exec_noarg_ints(guid, instance, method_id, res, ressize); + mutex_unlock(&priv->fancurve_mutex); + if (err) + return -EINVAL; + + out = scale * res[i]; + return sysfs_emit(buf, "%d\n", out); +} + +static int store_simple_wmi_attribute(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count, + const char *guid, u8 instance, + u32 method_id, bool invert, int scale) +{ + int state; + int err; + struct legion_private *priv = dev_get_drvdata(dev); + + err = kstrtouint(buf, 0, &state); + if (err) + return err; + err = set_simple_wmi_attribute(priv, guid, instance, method_id, invert, + scale, state); + if (err) + return err; + return count; +} + +static ssize_t lockfancontroller_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct legion_private *priv = dev_get_drvdata(dev); + bool is_lockfancontroller; + int err; + + mutex_lock(&priv->fancurve_mutex); + err = ec_read_lockfancontroller(&priv->ecram, priv->conf, + &is_lockfancontroller); + mutex_unlock(&priv->fancurve_mutex); + if (err) + return -EINVAL; + + return sysfs_emit(buf, "%d\n", is_lockfancontroller); +} + +static ssize_t lockfancontroller_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct legion_private *priv = dev_get_drvdata(dev); + bool is_lockfancontroller; + int err; + + err = kstrtobool(buf, &is_lockfancontroller); + if (err) + return err; + + mutex_lock(&priv->fancurve_mutex); + err = ec_write_lockfancontroller(&priv->ecram, priv->conf, + is_lockfancontroller); + mutex_unlock(&priv->fancurve_mutex); + if (err) + return -EINVAL; + + return count; +} + +static DEVICE_ATTR_RW(lockfancontroller); + +static ssize_t rapidcharge_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + bool state = false; + int err; + struct legion_private *priv = dev_get_drvdata(dev); + + mutex_lock(&priv->fancurve_mutex); + err = acpi_read_rapidcharge(priv->adev, &state); + mutex_unlock(&priv->fancurve_mutex); + if (err) + return -EINVAL; + + return sysfs_emit(buf, "%d\n", state); +} + +static ssize_t rapidcharge_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct legion_private *priv = dev_get_drvdata(dev); + int state; + int err; + + err = kstrtouint(buf, 0, &state); + if (err) + return err; + + mutex_lock(&priv->fancurve_mutex); + err = acpi_write_rapidcharge(priv->adev, state); + mutex_unlock(&priv->fancurve_mutex); + if (err) + return -EINVAL; + + return count; +} + +static DEVICE_ATTR_RW(rapidcharge); + +static ssize_t issupportgpuoc_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_simple_wmi_attribute(dev, attr, buf, + LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_ISSUPPORTGPUOC, false, + 1); +} + +static DEVICE_ATTR_RO(issupportgpuoc); + +static ssize_t aslcodeversion_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_simple_wmi_attribute(dev, attr, buf, + LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_GETVERSION, false, 1); +} + +static DEVICE_ATTR_RO(aslcodeversion); + +static ssize_t issupportcpuoc_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_simple_wmi_attribute(dev, attr, buf, + LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_ISSUPPORTCPUOC, false, + 1); +} + +static DEVICE_ATTR_RO(issupportcpuoc); + +static ssize_t winkey_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return show_simple_wmi_attribute(dev, attr, buf, + LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_GETWINKEYSTATUS, true, + 1); +} + +static ssize_t winkey_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + return store_simple_wmi_attribute(dev, attr, buf, count, + LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_SETWINKEYSTATUS, true, + 1); +} + +static DEVICE_ATTR_RW(winkey); + +// on newer models the touchpad feature in ideapad does not work anymore, so +// we need this +static ssize_t touchpad_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return show_simple_wmi_attribute(dev, attr, buf, + LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_GETTPSTATUS, true, 1); +} + +static ssize_t touchpad_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + return store_simple_wmi_attribute(dev, attr, buf, count, + LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_SETTPSTATUS, true, 1); +} + +static DEVICE_ATTR_RW(touchpad); + +static ssize_t gsync_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return show_simple_wmi_attribute(dev, attr, buf, + LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_GETGSYNCSTATUS, true, 1); +} + +static ssize_t gsync_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + return store_simple_wmi_attribute(dev, attr, buf, count, + LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_SETGSYNCSTATUS, true, + 1); +} + +static DEVICE_ATTR_RW(gsync); + +static ssize_t powerchargemode_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_simple_wmi_attribute(dev, attr, buf, + LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_GETPOWERCHARGEMODE, + false, 1); +} +static DEVICE_ATTR_RO(powerchargemode); + +static ssize_t overdrive_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return show_simple_wmi_attribute(dev, attr, buf, + LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_GETODSTATUS, false, 1); +} + +static ssize_t overdrive_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + return store_simple_wmi_attribute(dev, attr, buf, count, + LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_SETODSTATUS, false, 1); +} + +static DEVICE_ATTR_RW(overdrive); + +static ssize_t thermalmode_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_simple_wmi_attribute(dev, attr, buf, + LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_GETTHERMALMODE, false, + 1); +} +static DEVICE_ATTR_RO(thermalmode); + +// TOOD: probably remove again because provided by other means; only useful for overclocking +static ssize_t cpumaxfrequency_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_simple_wmi_attribute(dev, attr, buf, + LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_GETCPUMAXFREQUENCY, + false, 1); +} +static DEVICE_ATTR_RO(cpumaxfrequency); + +static ssize_t isacfitforoc_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_simple_wmi_attribute(dev, attr, buf, + LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_ISACFITFOROC, false, 1); +} +static DEVICE_ATTR_RO(isacfitforoc); + +static ssize_t igpumode_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return show_simple_wmi_attribute(dev, attr, buf, + LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_GETIGPUMODESTATUS, false, + 1); +} + +static ssize_t igpumode_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + return store_simple_wmi_attribute(dev, attr, buf, count, + LEGION_WMI_GAMEZONE_GUID, 0, + WMI_METHOD_ID_SETIGPUMODESTATUS, + false, 1); +} + +static DEVICE_ATTR_RW(igpumode); + +static ssize_t cpu_oc_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return show_simple_wmi_attribute_from_buffer( + dev, attr, buf, WMI_GUID_LENOVO_CPU_METHOD, 0, + WMI_METHOD_ID_CPU_GET_OC_STATUS, 16, 0, 1); +} + +static ssize_t cpu_oc_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + return store_simple_wmi_attribute(dev, attr, buf, count, + WMI_GUID_LENOVO_CPU_METHOD, 0, + WMI_METHOD_ID_CPU_SET_OC_STATUS, + false, 1); +} + +static DEVICE_ATTR_RW(cpu_oc); + +static ssize_t cpu_shortterm_powerlimit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return show_simple_wmi_attribute_from_buffer( + dev, attr, buf, WMI_GUID_LENOVO_CPU_METHOD, 0, + WMI_METHOD_ID_CPU_GET_SHORTTERM_POWERLIMIT, 16, 0, 1); +} + +static ssize_t cpu_shortterm_powerlimit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return store_simple_wmi_attribute( + dev, attr, buf, count, WMI_GUID_LENOVO_CPU_METHOD, 0, + WMI_METHOD_ID_CPU_SET_SHORTTERM_POWERLIMIT, false, 1); +} + +static DEVICE_ATTR_RW(cpu_shortterm_powerlimit); + +static ssize_t cpu_longterm_powerlimit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return show_simple_wmi_attribute_from_buffer( + dev, attr, buf, WMI_GUID_LENOVO_CPU_METHOD, 0, + WMI_METHOD_ID_CPU_GET_LONGTERM_POWERLIMIT, 16, 0, 1); +} + +static ssize_t cpu_longterm_powerlimit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return store_simple_wmi_attribute( + dev, attr, buf, count, WMI_GUID_LENOVO_CPU_METHOD, 0, + WMI_METHOD_ID_CPU_SET_LONGTERM_POWERLIMIT, false, 1); +} + +static DEVICE_ATTR_RW(cpu_longterm_powerlimit); + +static ssize_t cpu_default_powerlimit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return show_simple_wmi_attribute( + dev, attr, buf, WMI_GUID_LENOVO_CPU_METHOD, 0, + WMI_METHOD_ID_CPU_GET_DEFAULT_POWERLIMIT, false, 1); +} + +static DEVICE_ATTR_RO(cpu_default_powerlimit); + +static ssize_t cpu_peak_powerlimit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return show_simple_wmi_attribute(dev, attr, buf, + WMI_GUID_LENOVO_GPU_METHOD, 0, + WMI_METHOD_ID_CPU_GET_PEAK_POWERLIMIT, + false, 1); +} + +static ssize_t cpu_peak_powerlimit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return store_simple_wmi_attribute(dev, attr, buf, count, + WMI_GUID_LENOVO_GPU_METHOD, 0, + WMI_METHOD_ID_CPU_SET_PEAK_POWERLIMIT, + false, 1); +} + +static DEVICE_ATTR_RW(cpu_peak_powerlimit); + +static ssize_t cpu_apu_sppt_powerlimit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return show_simple_wmi_attribute( + dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0, + WMI_METHOD_ID_CPU_GET_APU_SPPT_POWERLIMIT, false, 1); +} + +static ssize_t cpu_apu_sppt_powerlimit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return store_simple_wmi_attribute( + dev, attr, buf, count, WMI_GUID_LENOVO_GPU_METHOD, 0, + WMI_METHOD_ID_CPU_SET_APU_SPPT_POWERLIMIT, false, 1); +} + +static DEVICE_ATTR_RW(cpu_apu_sppt_powerlimit); + +static ssize_t cpu_cross_loading_powerlimit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return show_simple_wmi_attribute( + dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0, + WMI_METHOD_ID_CPU_GET_CROSS_LOADING_POWERLIMIT, false, 1); +} + +static ssize_t cpu_cross_loading_powerlimit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return store_simple_wmi_attribute( + dev, attr, buf, count, WMI_GUID_LENOVO_GPU_METHOD, 0, + WMI_METHOD_ID_CPU_SET_CROSS_LOADING_POWERLIMIT, false, 1); +} + +static DEVICE_ATTR_RW(cpu_cross_loading_powerlimit); + +static ssize_t gpu_oc_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return show_simple_wmi_attribute(dev, attr, buf, + WMI_GUID_LENOVO_GPU_METHOD, 0, + WMI_METHOD_ID_GPU_GET_OC_STATUS, false, + 1); +} + +static ssize_t gpu_oc_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + return store_simple_wmi_attribute(dev, attr, buf, count, + WMI_GUID_LENOVO_GPU_METHOD, 0, + WMI_METHOD_ID_GPU_SET_OC_STATUS, + false, 1); +} + +static DEVICE_ATTR_RW(gpu_oc); + +static ssize_t gpu_ppab_powerlimit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return show_simple_wmi_attribute_from_buffer( + dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0, + WMI_METHOD_ID_GPU_GET_PPAB_POWERLIMIT, 16, 0, 1); +} + +static ssize_t gpu_ppab_powerlimit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return store_simple_wmi_attribute(dev, attr, buf, count, + WMI_GUID_LENOVO_GPU_METHOD, 0, + WMI_METHOD_ID_GPU_SET_PPAB_POWERLIMIT, + false, 1); +} + +static DEVICE_ATTR_RW(gpu_ppab_powerlimit); + +static ssize_t gpu_ctgp_powerlimit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return show_simple_wmi_attribute_from_buffer( + dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0, + WMI_METHOD_ID_GPU_GET_CTGP_POWERLIMIT, 16, 0, 1); +} + +static ssize_t gpu_ctgp_powerlimit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return store_simple_wmi_attribute(dev, attr, buf, count, + WMI_GUID_LENOVO_GPU_METHOD, 0, + WMI_METHOD_ID_GPU_SET_CTGP_POWERLIMIT, + false, 1); +} + +static DEVICE_ATTR_RW(gpu_ctgp_powerlimit); + +static ssize_t gpu_ctgp2_powerlimit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return show_simple_wmi_attribute_from_buffer( + dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0, + WMI_METHOD_ID_GPU_GET_CTGP_POWERLIMIT, 16, 0x0C, 1); +} + +static DEVICE_ATTR_RO(gpu_ctgp2_powerlimit); + +// TOOD: probably remove again because provided by other means; only useful for overclocking +static ssize_t +gpu_default_ppab_ctrgp_powerlimit_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_simple_wmi_attribute( + dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0, + WMI_METHOD_ID_GPU_GET_DEFAULT_PPAB_CTGP_POWERLIMIT, false, 1); +} +static DEVICE_ATTR_RO(gpu_default_ppab_ctrgp_powerlimit); + +static ssize_t gpu_temperature_limit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return show_simple_wmi_attribute( + dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0, + WMI_METHOD_ID_GPU_GET_TEMPERATURE_LIMIT, false, 1); +} + +static ssize_t gpu_temperature_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return store_simple_wmi_attribute( + dev, attr, buf, count, WMI_GUID_LENOVO_GPU_METHOD, 0, + WMI_METHOD_ID_GPU_SET_TEMPERATURE_LIMIT, false, 1); +} + +static DEVICE_ATTR_RW(gpu_temperature_limit); + +// TOOD: probably remove again because provided by other means; only useful for overclocking +static ssize_t gpu_boost_clock_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_simple_wmi_attribute(dev, attr, buf, + WMI_GUID_LENOVO_GPU_METHOD, 0, + WMI_METHOD_ID_GPU_GET_BOOST_CLOCK, + false, 1); +} +static DEVICE_ATTR_RO(gpu_boost_clock); + +static ssize_t fan_fullspeed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + bool state = false; + int err; + struct legion_private *priv = dev_get_drvdata(dev); + + mutex_lock(&priv->fancurve_mutex); + err = read_fanfullspeed(priv, &state); + mutex_unlock(&priv->fancurve_mutex); + if (err) + return -EINVAL; + + return sysfs_emit(buf, "%d\n", state); +} + +static ssize_t fan_fullspeed_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct legion_private *priv = dev_get_drvdata(dev); + int state; + int err; + + err = kstrtouint(buf, 0, &state); + if (err) + return err; + + mutex_lock(&priv->fancurve_mutex); + err = write_fanfullspeed(priv, state); + mutex_unlock(&priv->fancurve_mutex); + if (err) + return -EINVAL; + + return count; +} + +static DEVICE_ATTR_RW(fan_fullspeed); + +static ssize_t fan_maxspeed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_simple_wmi_attribute(dev, attr, buf, + WMI_GUID_LENOVO_FAN_METHOD, 0, + WMI_METHOD_ID_FAN_GET_MAXSPEED, false, + 1); +} + +static ssize_t fan_maxspeed_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return store_simple_wmi_attribute(dev, attr, buf, count, + WMI_GUID_LENOVO_FAN_METHOD, 0, + WMI_METHOD_ID_FAN_SET_MAXSPEED, false, + 1); +} + +static DEVICE_ATTR_RW(fan_maxspeed); + +static ssize_t powermode_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct legion_private *priv = dev_get_drvdata(dev); + int power_mode; + + mutex_lock(&priv->fancurve_mutex); + read_powermode(priv, &power_mode); + mutex_unlock(&priv->fancurve_mutex); + return sysfs_emit(buf, "%d\n", power_mode); +} + +static void legion_platform_profile_notify(void); + +static ssize_t powermode_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct legion_private *priv = dev_get_drvdata(dev); + int powermode; + int err; + + err = kstrtouint(buf, 0, &powermode); + if (err) + return err; + + mutex_lock(&priv->fancurve_mutex); + err = write_powermode(priv, powermode); + mutex_unlock(&priv->fancurve_mutex); + if (err) + return -EINVAL; + + // TODO: better? + // we have to wait a bit before change is done in hardware and + // readback done after notifying returns correct value, otherwise + // the notified reader will read old value + msleep(500); + legion_platform_profile_notify(); + + return count; +} + +static DEVICE_ATTR_RW(powermode); + +static struct attribute *legion_sysfs_attributes[] = { + &dev_attr_powermode.attr, + &dev_attr_lockfancontroller.attr, + &dev_attr_rapidcharge.attr, + &dev_attr_winkey.attr, + &dev_attr_touchpad.attr, + &dev_attr_gsync.attr, + &dev_attr_powerchargemode.attr, + &dev_attr_overdrive.attr, + &dev_attr_cpumaxfrequency.attr, + &dev_attr_isacfitforoc.attr, + &dev_attr_cpu_oc.attr, + &dev_attr_cpu_shortterm_powerlimit.attr, + &dev_attr_cpu_longterm_powerlimit.attr, + &dev_attr_cpu_apu_sppt_powerlimit.attr, + &dev_attr_cpu_default_powerlimit.attr, + &dev_attr_cpu_peak_powerlimit.attr, + &dev_attr_cpu_cross_loading_powerlimit.attr, + &dev_attr_gpu_oc.attr, + &dev_attr_gpu_ppab_powerlimit.attr, + &dev_attr_gpu_ctgp_powerlimit.attr, + &dev_attr_gpu_ctgp2_powerlimit.attr, + &dev_attr_gpu_default_ppab_ctrgp_powerlimit.attr, + &dev_attr_gpu_temperature_limit.attr, + &dev_attr_gpu_boost_clock.attr, + &dev_attr_fan_fullspeed.attr, + &dev_attr_fan_maxspeed.attr, + &dev_attr_thermalmode.attr, + &dev_attr_issupportcpuoc.attr, + &dev_attr_issupportgpuoc.attr, + &dev_attr_aslcodeversion.attr, + &dev_attr_igpumode.attr, + NULL +}; + +static const struct attribute_group legion_attribute_group = { + .attrs = legion_sysfs_attributes +}; + +static int legion_sysfs_init(struct legion_private *priv) +{ + return device_add_group(&priv->platform_device->dev, + &legion_attribute_group); +} + +static void legion_sysfs_exit(struct legion_private *priv) +{ + pr_info("Unloading legion sysfs\n"); + device_remove_group(&priv->platform_device->dev, + &legion_attribute_group); + pr_info("Unloading legion sysfs done\n"); +} + +/* ============================= */ +/* WMI + ACPI */ +/* ============================ */ +// heavily based on ideapad_laptop.c + +// TODO: proper names if meaning of all events is clear +enum LEGION_WMI_EVENT { + LEGION_WMI_EVENT_GAMEZONE = 1, + LEGION_EVENT_A, + LEGION_EVENT_B, + LEGION_EVENT_C, + LEGION_EVENT_D, + LEGION_EVENT_E, + LEGION_EVENT_F, + LEGION_EVENT_G +}; + +struct legion_wmi_private { + enum LEGION_WMI_EVENT event; +}; + +//static void legion_wmi_notify2(u32 value, void *context) +// { +// pr_info("WMI notify\n" ); +// } + +static void legion_wmi_notify(struct wmi_device *wdev, union acpi_object *data) +{ + struct legion_wmi_private *wpriv; + struct legion_private *priv; + + mutex_lock(&legion_shared_mutex); + priv = legion_shared; + if ((!priv) && (priv->loaded)) { + pr_info("Received WMI event while not initialized!\n"); + goto unlock; + } + + wpriv = dev_get_drvdata(&wdev->dev); + switch (wpriv->event) { + case LEGION_EVENT_A: + pr_info("Fan event: legion type: %d; acpi type: %d (%d=integer)", + wpriv->event, data->type, ACPI_TYPE_INTEGER); + // TODO: here it is too early (first unlock mutext, then wait a bit) + //legion_platform_profile_notify(); + break; + default: + pr_info("Event: legion type: %d; acpi type: %d (%d=integer)", + wpriv->event, data->type, ACPI_TYPE_INTEGER); + break; + } + +unlock: + mutex_unlock(&legion_shared_mutex); + // todo; fix that! + // problem: we get an event just before the powermode change (from the key?), + // so if we notify too early, it will read the old power mode/platform profile + msleep(500); + legion_platform_profile_notify(); +} + +static int legion_wmi_probe(struct wmi_device *wdev, const void *context) +{ + struct legion_wmi_private *wpriv; + + wpriv = devm_kzalloc(&wdev->dev, sizeof(*wpriv), GFP_KERNEL); + if (!wpriv) + return -ENOMEM; + + *wpriv = *(const struct legion_wmi_private *)context; + + dev_set_drvdata(&wdev->dev, wpriv); + dev_info(&wdev->dev, "Register after probing for WMI.\n"); + return 0; +} + +static const struct legion_wmi_private legion_wmi_context_gamezone = { + .event = LEGION_WMI_EVENT_GAMEZONE +}; +static const struct legion_wmi_private legion_wmi_context_a = { + .event = LEGION_EVENT_A +}; +static const struct legion_wmi_private legion_wmi_context_b = { + .event = LEGION_EVENT_B +}; +static const struct legion_wmi_private legion_wmi_context_c = { + .event = LEGION_EVENT_C +}; +static const struct legion_wmi_private legion_wmi_context_d = { + .event = LEGION_EVENT_D +}; +static const struct legion_wmi_private legion_wmi_context_e = { + .event = LEGION_EVENT_E +}; +static const struct legion_wmi_private legion_wmi_context_f = { + .event = LEGION_EVENT_F +}; + +#define LEGION_WMI_GUID_FAN_EVENT "D320289E-8FEA-41E0-86F9-611D83151B5F" +#define LEGION_WMI_GUID_FAN2_EVENT "bc72a435-e8c1-4275-b3e2-d8b8074aba59" +#define LEGION_WMI_GUID_GAMEZONE_KEY_EVENT \ + "10afc6d9-ea8b-4590-a2e7-1cd3c84bb4b1" +#define LEGION_WMI_GUID_GAMEZONE_GPU_EVENT \ + "bfd42481-aee3-4502-a107-afb68425c5f8" +#define LEGION_WMI_GUID_GAMEZONE_OC_EVENT "d062906b-12d4-4510-999d-4831ee80e985" +#define LEGION_WMI_GUID_GAMEZONE_TEMP_EVENT \ + "bfd42481-aee3-4501-a107-afb68425c5f8" +//#define LEGION_WMI_GUID_GAMEZONE_DATA_EVENT "887b54e3-dddc-4b2c-8b88-68a26a8835d0" + +static const struct wmi_device_id legion_wmi_ids[] = { + { LEGION_WMI_GAMEZONE_GUID, &legion_wmi_context_gamezone }, + { LEGION_WMI_GUID_FAN_EVENT, &legion_wmi_context_a }, + { LEGION_WMI_GUID_FAN2_EVENT, &legion_wmi_context_b }, + { LEGION_WMI_GUID_GAMEZONE_KEY_EVENT, &legion_wmi_context_c }, + { LEGION_WMI_GUID_GAMEZONE_GPU_EVENT, &legion_wmi_context_d }, + { LEGION_WMI_GUID_GAMEZONE_OC_EVENT, &legion_wmi_context_e }, + { LEGION_WMI_GUID_GAMEZONE_TEMP_EVENT, &legion_wmi_context_f }, + { "8FC0DE0C-B4E4-43FD-B0F3-8871711C1294", + &legion_wmi_context_gamezone }, /* Legion 5 */ + {}, +}; +MODULE_DEVICE_TABLE(wmi, legion_wmi_ids); + +static struct wmi_driver legion_wmi_driver = { + .driver = { + .name = "legion_wmi", + }, + .id_table = legion_wmi_ids, + .probe = legion_wmi_probe, + .notify = legion_wmi_notify, +}; + +//acpi_status status = wmi_install_notify_handler(LEGION_WMI_GAMEZONE_GUID, +// legion_wmi_notify2, NULL); +//if (ACPI_FAILURE(status)) { +// return -ENODEV; +//} +//return 0; + +static int legion_wmi_init(void) +{ + return wmi_driver_register(&legion_wmi_driver); +} + +static void legion_wmi_exit(void) +{ + // TODO: remove this + pr_info("Unloading legion WMI\n"); + + //wmi_remove_notify_handler(LEGION_WMI_GAMEZONE_GUID); + wmi_driver_unregister(&legion_wmi_driver); + pr_info("Unloading legion WMI done\n"); +} + +/* ============================= */ +/* Platform profile */ +/* ============================ */ + +static void legion_platform_profile_notify(void) +{ + if (!enable_platformprofile) + pr_info("Skipping platform_profile_notify because enable_platformprofile is false\n"); + + platform_profile_notify(); +} + +static int legion_platform_profile_get(struct platform_profile_handler *pprof, + enum platform_profile_option *profile) +{ + int powermode; + struct legion_private *priv; + + priv = container_of(pprof, struct legion_private, + platform_profile_handler); + read_powermode(priv, &powermode); + + switch (powermode) { + case LEGION_WMI_POWERMODE_BALANCED: + *profile = PLATFORM_PROFILE_BALANCED; + break; + case LEGION_WMI_POWERMODE_PERFORMANCE: + *profile = PLATFORM_PROFILE_PERFORMANCE; + break; + case LEGION_WMI_POWERMODE_QUIET: + *profile = PLATFORM_PROFILE_QUIET; + break; + case LEGION_WMI_POWERMODE_CUSTOM: + *profile = PLATFORM_PROFILE_BALANCED_PERFORMANCE; + break; + default: + return -EINVAL; + } + return 0; +} + +static int legion_platform_profile_set(struct platform_profile_handler *pprof, + enum platform_profile_option profile) +{ + int powermode; + struct legion_private *priv; + + priv = container_of(pprof, struct legion_private, + platform_profile_handler); + + switch (profile) { + case PLATFORM_PROFILE_BALANCED: + powermode = LEGION_WMI_POWERMODE_BALANCED; + break; + case PLATFORM_PROFILE_PERFORMANCE: + powermode = LEGION_WMI_POWERMODE_PERFORMANCE; + break; + case PLATFORM_PROFILE_QUIET: + powermode = LEGION_WMI_POWERMODE_QUIET; + break; + case PLATFORM_PROFILE_BALANCED_PERFORMANCE: + powermode = LEGION_WMI_POWERMODE_CUSTOM; + break; + default: + return -EOPNOTSUPP; + } + + return write_powermode(priv, powermode); +} + +static int legion_platform_profile_init(struct legion_private *priv) +{ + int err; + + if (!enable_platformprofile) { + pr_info("Skipping creating platform profile support because enable_platformprofile is false\n"); + return 0; + } + + priv->platform_profile_handler.profile_get = + legion_platform_profile_get; + priv->platform_profile_handler.profile_set = + legion_platform_profile_set; + + set_bit(PLATFORM_PROFILE_QUIET, priv->platform_profile_handler.choices); + set_bit(PLATFORM_PROFILE_BALANCED, + priv->platform_profile_handler.choices); + set_bit(PLATFORM_PROFILE_PERFORMANCE, + priv->platform_profile_handler.choices); + if (priv->conf->has_custom_powermode && + priv->conf->access_method_powermode == ACCESS_METHOD_WMI) { + set_bit(PLATFORM_PROFILE_BALANCED_PERFORMANCE, + priv->platform_profile_handler.choices); + } + + err = platform_profile_register(&priv->platform_profile_handler); + if (err) + return err; + + return 0; +} + +static void legion_platform_profile_exit(struct legion_private *priv) +{ + if (!enable_platformprofile) { + pr_info("Skipping unloading platform profile support because enable_platformprofile is false\n"); + return; + } + pr_info("Unloading legion platform profile\n"); + platform_profile_remove(); + pr_info("Unloading legion platform profile done\n"); +} + +/* ============================= */ +/* hwom interface */ +/* ============================ */ + +// hw-mon interface + +// todo: register_group or register_info? + +// TODO: use one common function (like here) or one function per attribute? +static ssize_t sensor_label_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int sensor_id = (to_sensor_dev_attr(attr))->index; + const char *label; + + switch (sensor_id) { + case SENSOR_CPU_TEMP_ID: + label = "CPU Temperature\n"; + break; + case SENSOR_GPU_TEMP_ID: + label = "GPU Temperature\n"; + break; + case SENSOR_IC_TEMP_ID: + label = "IC Temperature\n"; + break; + case SENSOR_FAN1_RPM_ID: + label = "Fan 1\n"; + break; + case SENSOR_FAN2_RPM_ID: + label = "Fan 2\n"; + break; + case SENSOR_FAN1_TARGET_RPM_ID: + label = "Fan 1 Target\n"; + break; + case SENSOR_FAN2_TARGET_RPM_ID: + label = "Fan 2 Target\n"; + break; + default: + return -EOPNOTSUPP; + } + + return sprintf(buf, label); +} + +// TODO: use one common function (like here) or one function per attribute? +static ssize_t sensor_show(struct device *dev, struct device_attribute *devattr, + char *buf) +{ + struct legion_private *priv = dev_get_drvdata(dev); + int sensor_id = (to_sensor_dev_attr(devattr))->index; + struct sensor_values values; + int outval; + int err = -EIO; + + switch (sensor_id) { + case SENSOR_CPU_TEMP_ID: + err = read_temperature(priv, 0, &outval); + outval *= 1000; + break; + case SENSOR_GPU_TEMP_ID: + err = read_temperature(priv, 1, &outval); + outval *= 1000; + break; + case SENSOR_IC_TEMP_ID: + ec_read_sensor_values(&priv->ecram, priv->conf, &values); + outval = 1000 * values.ic_temp_celsius; + err = 0; + break; + case SENSOR_FAN1_RPM_ID: + err = read_fanspeed(priv, 0, &outval); + break; + case SENSOR_FAN2_RPM_ID: + err = read_fanspeed(priv, 1, &outval); + break; + case SENSOR_FAN1_TARGET_RPM_ID: + ec_read_sensor_values(&priv->ecram, priv->conf, &values); + outval = values.fan1_target_rpm; + err = 0; + break; + case SENSOR_FAN2_TARGET_RPM_ID: + ec_read_sensor_values(&priv->ecram, priv->conf, &values); + outval = values.fan2_target_rpm; + err = 0; + break; + default: + pr_info("Error reading sensor value with id %d\n", sensor_id); + return -EOPNOTSUPP; + } + if (err) + return err; + + return sprintf(buf, "%d\n", outval); +} + +static SENSOR_DEVICE_ATTR_RO(temp1_input, sensor, SENSOR_CPU_TEMP_ID); +static SENSOR_DEVICE_ATTR_RO(temp1_label, sensor_label, SENSOR_CPU_TEMP_ID); +static SENSOR_DEVICE_ATTR_RO(temp2_input, sensor, SENSOR_GPU_TEMP_ID); +static SENSOR_DEVICE_ATTR_RO(temp2_label, sensor_label, SENSOR_GPU_TEMP_ID); +static SENSOR_DEVICE_ATTR_RO(temp3_input, sensor, SENSOR_IC_TEMP_ID); +static SENSOR_DEVICE_ATTR_RO(temp3_label, sensor_label, SENSOR_IC_TEMP_ID); +static SENSOR_DEVICE_ATTR_RO(fan1_input, sensor, SENSOR_FAN1_RPM_ID); +static SENSOR_DEVICE_ATTR_RO(fan1_label, sensor_label, SENSOR_FAN1_RPM_ID); +static SENSOR_DEVICE_ATTR_RO(fan2_input, sensor, SENSOR_FAN2_RPM_ID); +static SENSOR_DEVICE_ATTR_RO(fan2_label, sensor_label, SENSOR_FAN2_RPM_ID); +static SENSOR_DEVICE_ATTR_RO(fan1_target, sensor, SENSOR_FAN1_TARGET_RPM_ID); +static SENSOR_DEVICE_ATTR_RO(fan2_target, sensor, SENSOR_FAN2_TARGET_RPM_ID); + +static struct attribute *sensor_hwmon_attributes[] = { + &sensor_dev_attr_temp1_input.dev_attr.attr, + &sensor_dev_attr_temp1_label.dev_attr.attr, + &sensor_dev_attr_temp2_input.dev_attr.attr, + &sensor_dev_attr_temp2_label.dev_attr.attr, + &sensor_dev_attr_temp3_input.dev_attr.attr, + &sensor_dev_attr_temp3_label.dev_attr.attr, + &sensor_dev_attr_fan1_input.dev_attr.attr, + &sensor_dev_attr_fan1_label.dev_attr.attr, + &sensor_dev_attr_fan2_input.dev_attr.attr, + &sensor_dev_attr_fan2_label.dev_attr.attr, + &sensor_dev_attr_fan1_target.dev_attr.attr, + &sensor_dev_attr_fan2_target.dev_attr.attr, + NULL +}; + +static ssize_t autopoint_show(struct device *dev, + struct device_attribute *devattr, char *buf) +{ + struct fancurve fancurve; + int err; + int value; + struct legion_private *priv = dev_get_drvdata(dev); + int fancurve_attr_id = to_sensor_dev_attr_2(devattr)->nr; + int point_id = to_sensor_dev_attr_2(devattr)->index; + + mutex_lock(&priv->fancurve_mutex); + err = read_fancurve(priv, &fancurve); + mutex_unlock(&priv->fancurve_mutex); + + if (err) { + pr_info("Failed to read fancurve\n"); + return -EOPNOTSUPP; + } + if (!(point_id >= 0 && point_id < MAXFANCURVESIZE)) { + pr_info("Failed to read fancurve due to wrong point id: %d\n", + point_id); + return -EOPNOTSUPP; + } + + switch (fancurve_attr_id) { + case FANCURVE_ATTR_PWM1: + value = fancurve.points[point_id].rpm1_raw * 100; + break; + case FANCURVE_ATTR_PWM2: + value = fancurve.points[point_id].rpm2_raw * 100; + break; + case FANCURVE_ATTR_CPU_TEMP: + value = fancurve.points[point_id].cpu_max_temp_celsius; + break; + case FANCURVE_ATTR_CPU_HYST: + value = fancurve.points[point_id].cpu_min_temp_celsius; + break; + case FANCURVE_ATTR_GPU_TEMP: + value = fancurve.points[point_id].gpu_max_temp_celsius; + break; + case FANCURVE_ATTR_GPU_HYST: + value = fancurve.points[point_id].gpu_min_temp_celsius; + break; + case FANCURVE_ATTR_IC_TEMP: + value = fancurve.points[point_id].ic_max_temp_celsius; + break; + case FANCURVE_ATTR_IC_HYST: + value = fancurve.points[point_id].ic_min_temp_celsius; + break; + case FANCURVE_ATTR_ACCEL: + value = fancurve.points[point_id].accel; + break; + case FANCURVE_ATTR_DECEL: + value = fancurve.points[point_id].decel; + break; + case FANCURVE_SIZE: + value = fancurve.size; + break; + default: + pr_info("Failed to read fancurve due to wrong attribute id: %d\n", + fancurve_attr_id); + return -EOPNOTSUPP; + } + + return sprintf(buf, "%d\n", value); +} + +static ssize_t autopoint_store(struct device *dev, + struct device_attribute *devattr, + const char *buf, size_t count) +{ + struct fancurve fancurve; + int err; + int value; + bool valid; + struct legion_private *priv = dev_get_drvdata(dev); + int fancurve_attr_id = to_sensor_dev_attr_2(devattr)->nr; + int point_id = to_sensor_dev_attr_2(devattr)->index; + bool write_fancurve_size = false; + + if (!(point_id >= 0 && point_id < MAXFANCURVESIZE)) { + pr_info("Failed to read fancurve due to wrong point id: %d\n", + point_id); + err = -EOPNOTSUPP; + goto error; + } + + err = kstrtoint(buf, 0, &value); + if (err) { + pr_info("Parsing hwmon store failed: error: %d; point_id: %d; fancurve_attr_id: %d\\n", + err, point_id, fancurve_attr_id); + goto error; + } + + mutex_lock(&priv->fancurve_mutex); + err = read_fancurve(priv, &fancurve); + + if (err) { + pr_info("Failed to read fancurve\n"); + err = -EOPNOTSUPP; + goto error_mutex; + } + + switch (fancurve_attr_id) { + case FANCURVE_ATTR_PWM1: + valid = fancurve_set_rpm1(&fancurve, point_id, value); + break; + case FANCURVE_ATTR_PWM2: + valid = fancurve_set_rpm2(&fancurve, point_id, value); + break; + case FANCURVE_ATTR_CPU_TEMP: + valid = fancurve_set_cpu_temp_max(&fancurve, point_id, value); + break; + case FANCURVE_ATTR_CPU_HYST: + valid = fancurve_set_cpu_temp_min(&fancurve, point_id, value); + break; + case FANCURVE_ATTR_GPU_TEMP: + valid = fancurve_set_gpu_temp_max(&fancurve, point_id, value); + break; + case FANCURVE_ATTR_GPU_HYST: + valid = fancurve_set_gpu_temp_min(&fancurve, point_id, value); + break; + case FANCURVE_ATTR_IC_TEMP: + valid = fancurve_set_ic_temp_max(&fancurve, point_id, value); + break; + case FANCURVE_ATTR_IC_HYST: + valid = fancurve_set_ic_temp_min(&fancurve, point_id, value); + break; + case FANCURVE_ATTR_ACCEL: + valid = fancurve_set_accel(&fancurve, point_id, value); + break; + case FANCURVE_ATTR_DECEL: + valid = fancurve_set_decel(&fancurve, point_id, value); + break; + case FANCURVE_SIZE: + valid = fancurve_set_size(&fancurve, value, true); + write_fancurve_size = true; + break; + default: + pr_info("Failed to write fancurve due to wrong attribute id: %d\n", + fancurve_attr_id); + err = -EOPNOTSUPP; + goto error_mutex; + } + + if (!valid) { + pr_info("Ignoring invalid fancurve value %d for attribute %d at point %d\n", + value, fancurve_attr_id, point_id); + err = -EOPNOTSUPP; + goto error_mutex; + } + + err = write_fancurve(priv, &fancurve, write_fancurve_size); + if (err) { + pr_info("Failed to write fancurve for accessing hwmon at point_id: %d\n", + point_id); + err = -EOPNOTSUPP; + goto error_mutex; + } + + mutex_unlock(&priv->fancurve_mutex); + return count; + +error_mutex: + mutex_unlock(&priv->fancurve_mutex); +error: + return count; +} + +// rpm1 +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_pwm, autopoint, + FANCURVE_ATTR_PWM1, 0); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_pwm, autopoint, + FANCURVE_ATTR_PWM1, 1); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_pwm, autopoint, + FANCURVE_ATTR_PWM1, 2); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_pwm, autopoint, + FANCURVE_ATTR_PWM1, 3); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_pwm, autopoint, + FANCURVE_ATTR_PWM1, 4); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_pwm, autopoint, + FANCURVE_ATTR_PWM1, 5); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_pwm, autopoint, + FANCURVE_ATTR_PWM1, 6); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_pwm, autopoint, + FANCURVE_ATTR_PWM1, 7); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_pwm, autopoint, + FANCURVE_ATTR_PWM1, 8); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_pwm, autopoint, + FANCURVE_ATTR_PWM1, 9); +// rpm2 +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point1_pwm, autopoint, + FANCURVE_ATTR_PWM2, 0); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point2_pwm, autopoint, + FANCURVE_ATTR_PWM2, 1); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point3_pwm, autopoint, + FANCURVE_ATTR_PWM2, 2); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point4_pwm, autopoint, + FANCURVE_ATTR_PWM2, 3); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point5_pwm, autopoint, + FANCURVE_ATTR_PWM2, 4); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point6_pwm, autopoint, + FANCURVE_ATTR_PWM2, 5); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point7_pwm, autopoint, + FANCURVE_ATTR_PWM2, 6); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point8_pwm, autopoint, + FANCURVE_ATTR_PWM2, 7); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point9_pwm, autopoint, + FANCURVE_ATTR_PWM2, 8); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point10_pwm, autopoint, + FANCURVE_ATTR_PWM2, 9); +// CPU temp +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_temp, autopoint, + FANCURVE_ATTR_CPU_TEMP, 0); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_temp, autopoint, + FANCURVE_ATTR_CPU_TEMP, 1); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_temp, autopoint, + FANCURVE_ATTR_CPU_TEMP, 2); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_temp, autopoint, + FANCURVE_ATTR_CPU_TEMP, 3); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_temp, autopoint, + FANCURVE_ATTR_CPU_TEMP, 4); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_temp, autopoint, + FANCURVE_ATTR_CPU_TEMP, 5); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_temp, autopoint, + FANCURVE_ATTR_CPU_TEMP, 6); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_temp, autopoint, + FANCURVE_ATTR_CPU_TEMP, 7); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_temp, autopoint, + FANCURVE_ATTR_CPU_TEMP, 8); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_temp, autopoint, + FANCURVE_ATTR_CPU_TEMP, 9); +// CPU temp hyst +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_temp_hyst, autopoint, + FANCURVE_ATTR_CPU_HYST, 0); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_temp_hyst, autopoint, + FANCURVE_ATTR_CPU_HYST, 1); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_temp_hyst, autopoint, + FANCURVE_ATTR_CPU_HYST, 2); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_temp_hyst, autopoint, + FANCURVE_ATTR_CPU_HYST, 3); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_temp_hyst, autopoint, + FANCURVE_ATTR_CPU_HYST, 4); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_temp_hyst, autopoint, + FANCURVE_ATTR_CPU_HYST, 5); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_temp_hyst, autopoint, + FANCURVE_ATTR_CPU_HYST, 6); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_temp_hyst, autopoint, + FANCURVE_ATTR_CPU_HYST, 7); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_temp_hyst, autopoint, + FANCURVE_ATTR_CPU_HYST, 8); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_temp_hyst, autopoint, + FANCURVE_ATTR_CPU_HYST, 9); +// GPU temp +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point1_temp, autopoint, + FANCURVE_ATTR_GPU_TEMP, 0); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point2_temp, autopoint, + FANCURVE_ATTR_GPU_TEMP, 1); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point3_temp, autopoint, + FANCURVE_ATTR_GPU_TEMP, 2); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point4_temp, autopoint, + FANCURVE_ATTR_GPU_TEMP, 3); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point5_temp, autopoint, + FANCURVE_ATTR_GPU_TEMP, 4); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point6_temp, autopoint, + FANCURVE_ATTR_GPU_TEMP, 5); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point7_temp, autopoint, + FANCURVE_ATTR_GPU_TEMP, 6); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point8_temp, autopoint, + FANCURVE_ATTR_GPU_TEMP, 7); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point9_temp, autopoint, + FANCURVE_ATTR_GPU_TEMP, 8); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point10_temp, autopoint, + FANCURVE_ATTR_GPU_TEMP, 9); +// GPU temp hyst +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point1_temp_hyst, autopoint, + FANCURVE_ATTR_GPU_HYST, 0); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point2_temp_hyst, autopoint, + FANCURVE_ATTR_GPU_HYST, 1); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point3_temp_hyst, autopoint, + FANCURVE_ATTR_GPU_HYST, 2); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point4_temp_hyst, autopoint, + FANCURVE_ATTR_GPU_HYST, 3); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point5_temp_hyst, autopoint, + FANCURVE_ATTR_GPU_HYST, 4); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point6_temp_hyst, autopoint, + FANCURVE_ATTR_GPU_HYST, 5); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point7_temp_hyst, autopoint, + FANCURVE_ATTR_GPU_HYST, 6); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point8_temp_hyst, autopoint, + FANCURVE_ATTR_GPU_HYST, 7); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point9_temp_hyst, autopoint, + FANCURVE_ATTR_GPU_HYST, 8); +static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point10_temp_hyst, autopoint, + FANCURVE_ATTR_GPU_HYST, 9); +// IC temp +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point1_temp, autopoint, + FANCURVE_ATTR_IC_TEMP, 0); +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point2_temp, autopoint, + FANCURVE_ATTR_IC_TEMP, 1); +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point3_temp, autopoint, + FANCURVE_ATTR_IC_TEMP, 2); +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point4_temp, autopoint, + FANCURVE_ATTR_IC_TEMP, 3); +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point5_temp, autopoint, + FANCURVE_ATTR_IC_TEMP, 4); +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point6_temp, autopoint, + FANCURVE_ATTR_IC_TEMP, 5); +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point7_temp, autopoint, + FANCURVE_ATTR_IC_TEMP, 6); +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point8_temp, autopoint, + FANCURVE_ATTR_IC_TEMP, 7); +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point9_temp, autopoint, + FANCURVE_ATTR_IC_TEMP, 8); +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point10_temp, autopoint, + FANCURVE_ATTR_IC_TEMP, 9); +// IC temp hyst +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point1_temp_hyst, autopoint, + FANCURVE_ATTR_IC_HYST, 0); +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point2_temp_hyst, autopoint, + FANCURVE_ATTR_IC_HYST, 1); +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point3_temp_hyst, autopoint, + FANCURVE_ATTR_IC_HYST, 2); +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point4_temp_hyst, autopoint, + FANCURVE_ATTR_IC_HYST, 3); +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point5_temp_hyst, autopoint, + FANCURVE_ATTR_IC_HYST, 4); +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point6_temp_hyst, autopoint, + FANCURVE_ATTR_IC_HYST, 5); +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point7_temp_hyst, autopoint, + FANCURVE_ATTR_IC_HYST, 6); +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point8_temp_hyst, autopoint, + FANCURVE_ATTR_IC_HYST, 7); +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point9_temp_hyst, autopoint, + FANCURVE_ATTR_IC_HYST, 8); +static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point10_temp_hyst, autopoint, + FANCURVE_ATTR_IC_HYST, 9); +// accel +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_accel, autopoint, + FANCURVE_ATTR_ACCEL, 0); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_accel, autopoint, + FANCURVE_ATTR_ACCEL, 1); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_accel, autopoint, + FANCURVE_ATTR_ACCEL, 2); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_accel, autopoint, + FANCURVE_ATTR_ACCEL, 3); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_accel, autopoint, + FANCURVE_ATTR_ACCEL, 4); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_accel, autopoint, + FANCURVE_ATTR_ACCEL, 5); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_accel, autopoint, + FANCURVE_ATTR_ACCEL, 6); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_accel, autopoint, + FANCURVE_ATTR_ACCEL, 7); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_accel, autopoint, + FANCURVE_ATTR_ACCEL, 8); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_accel, autopoint, + FANCURVE_ATTR_ACCEL, 9); +// decel +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_decel, autopoint, + FANCURVE_ATTR_DECEL, 0); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_decel, autopoint, + FANCURVE_ATTR_DECEL, 1); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_decel, autopoint, + FANCURVE_ATTR_DECEL, 2); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_decel, autopoint, + FANCURVE_ATTR_DECEL, 3); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_decel, autopoint, + FANCURVE_ATTR_DECEL, 4); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_decel, autopoint, + FANCURVE_ATTR_DECEL, 5); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_decel, autopoint, + FANCURVE_ATTR_DECEL, 6); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_decel, autopoint, + FANCURVE_ATTR_DECEL, 7); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_decel, autopoint, + FANCURVE_ATTR_DECEL, 8); +static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_decel, autopoint, + FANCURVE_ATTR_DECEL, 9); +//size +static SENSOR_DEVICE_ATTR_2_RW(auto_points_size, autopoint, FANCURVE_SIZE, 0); + +static ssize_t minifancurve_show(struct device *dev, + struct device_attribute *devattr, char *buf) +{ + bool value; + int err; + struct legion_private *priv = dev_get_drvdata(dev); + + mutex_lock(&priv->fancurve_mutex); + err = ec_read_minifancurve(&priv->ecram, priv->conf, &value); + if (err) { + err = -1; + pr_info("Failed to read minifancurve\n"); + goto error_unlock; + } + mutex_unlock(&priv->fancurve_mutex); + return sprintf(buf, "%d\n", value); + +error_unlock: + mutex_unlock(&priv->fancurve_mutex); + return -1; +} + +static ssize_t minifancurve_store(struct device *dev, + struct device_attribute *devattr, + const char *buf, size_t count) +{ + int value; + int err; + struct legion_private *priv = dev_get_drvdata(dev); + + err = kstrtoint(buf, 0, &value); + if (err) { + err = -1; + pr_info("Parsing hwmon store failed: error:%d\n", + err); + goto error; + } + + mutex_lock(&priv->fancurve_mutex); + err = ec_write_minifancurve(&priv->ecram, priv->conf, value); + if (err) { + err = -1; + pr_info("Failed to write minifancurve\n"); + goto error_unlock; + } + mutex_unlock(&priv->fancurve_mutex); + return count; + +error_unlock: + mutex_unlock(&priv->fancurve_mutex); +error: + return err; +} + +static SENSOR_DEVICE_ATTR_RW(minifancurve, minifancurve, 0); + +static ssize_t pwm1_mode_show(struct device *dev, + struct device_attribute *devattr, char *buf) +{ + bool value; + int err; + struct legion_private *priv = dev_get_drvdata(dev); + + mutex_lock(&priv->fancurve_mutex); + err = ec_read_fanfullspeed(&priv->ecram, priv->conf, &value); + if (err) { + err = -1; + pr_info("Failed to pwm1_mode/maximumfanspeed\n"); + goto error_unlock; + } + mutex_unlock(&priv->fancurve_mutex); + return sprintf(buf, "%d\n", value ? 0 : 2); + +error_unlock: + mutex_unlock(&priv->fancurve_mutex); + return -1; +} + +// TODO: remove? or use WMI method? +static ssize_t pwm1_mode_store(struct device *dev, + struct device_attribute *devattr, + const char *buf, size_t count) +{ + int value; + int is_maximumfanspeed; + int err; + struct legion_private *priv = dev_get_drvdata(dev); + + err = kstrtoint(buf, 0, &value); + if (err) { + err = -1; + pr_info("Parsing hwmon store failed: error:%d\n", + err); + goto error; + } + is_maximumfanspeed = value == 0; + + mutex_lock(&priv->fancurve_mutex); + err = ec_write_fanfullspeed(&priv->ecram, priv->conf, + is_maximumfanspeed); + if (err) { + err = -1; + pr_info("Failed to write pwm1_mode/maximumfanspeed\n"); + goto error_unlock; + } + mutex_unlock(&priv->fancurve_mutex); + return count; + +error_unlock: + mutex_unlock(&priv->fancurve_mutex); +error: + return err; +} + +static SENSOR_DEVICE_ATTR_RW(pwm1_mode, pwm1_mode, 0); + +static struct attribute *fancurve_hwmon_attributes[] = { + &sensor_dev_attr_pwm1_auto_point1_pwm.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point2_pwm.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point3_pwm.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point4_pwm.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point5_pwm.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point6_pwm.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point7_pwm.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point8_pwm.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point9_pwm.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point10_pwm.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point1_pwm.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point2_pwm.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point3_pwm.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point4_pwm.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point5_pwm.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point6_pwm.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point7_pwm.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point8_pwm.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point9_pwm.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point10_pwm.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point1_temp.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point2_temp.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point3_temp.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point4_temp.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point5_temp.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point6_temp.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point7_temp.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point8_temp.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point9_temp.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point10_temp.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point1_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point2_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point3_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point4_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point5_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point6_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point7_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point8_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point9_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point10_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point1_temp.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point2_temp.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point3_temp.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point4_temp.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point5_temp.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point6_temp.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point7_temp.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point8_temp.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point9_temp.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point10_temp.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point1_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point2_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point3_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point4_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point5_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point6_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point7_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point8_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point9_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point10_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point1_temp.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point2_temp.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point3_temp.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point4_temp.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point5_temp.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point6_temp.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point7_temp.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point8_temp.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point9_temp.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point10_temp.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point1_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point2_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point3_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point4_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point5_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point6_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point7_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point8_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point9_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point10_temp_hyst.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point1_accel.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point2_accel.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point3_accel.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point4_accel.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point5_accel.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point6_accel.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point7_accel.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point8_accel.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point9_accel.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point10_accel.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point1_decel.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point2_decel.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point3_decel.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point4_decel.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point5_decel.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point6_decel.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point7_decel.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point8_decel.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point9_decel.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point10_decel.dev_attr.attr, + // + &sensor_dev_attr_auto_points_size.dev_attr.attr, + &sensor_dev_attr_minifancurve.dev_attr.attr, + &sensor_dev_attr_pwm1_mode.dev_attr.attr, NULL +}; + +static umode_t legion_hwmon_is_visible(struct kobject *kobj, + struct attribute *attr, int idx) +{ + bool supported = true; + struct device *dev = kobj_to_dev(kobj); + struct legion_private *priv = dev_get_drvdata(dev); + + if (attr == &sensor_dev_attr_minifancurve.dev_attr.attr) + supported = priv->conf->has_minifancurve; + + supported = supported && (priv->conf->access_method_fancurve != + ACCESS_METHOD_NO_ACCESS); + + return supported ? attr->mode : 0; +} + +static const struct attribute_group legion_hwmon_sensor_group = { + .attrs = sensor_hwmon_attributes, + .is_visible = NULL +}; + +static const struct attribute_group legion_hwmon_fancurve_group = { + .attrs = fancurve_hwmon_attributes, + .is_visible = legion_hwmon_is_visible, +}; + +static const struct attribute_group *legion_hwmon_groups[] = { + &legion_hwmon_sensor_group, &legion_hwmon_fancurve_group, NULL +}; + +static ssize_t legion_hwmon_init(struct legion_private *priv) +{ + //TODO: use hwmon_device_register_with_groups or + // hwmon_device_register_with_info (latter means all hwmon functions have to be + // changed) + // some laptop driver do it in one way, some in the other + // TODO: Use devm_hwmon_device_register_with_groups ? + // some laptop drivers use this, some + struct device *hwmon_dev = hwmon_device_register_with_groups( + &priv->platform_device->dev, "legion_hwmon", priv, + legion_hwmon_groups); + if (IS_ERR_OR_NULL(hwmon_dev)) { + pr_err("hwmon_device_register failed!\n"); + return PTR_ERR(hwmon_dev); + } + dev_set_drvdata(hwmon_dev, priv); + priv->hwmon_dev = hwmon_dev; + return 0; +} + +static void legion_hwmon_exit(struct legion_private *priv) +{ + pr_info("Unloading legion hwon\n"); + if (priv->hwmon_dev) { + hwmon_device_unregister(priv->hwmon_dev); + priv->hwmon_dev = NULL; + } + pr_info("Unloading legion hwon done\n"); +} + +/* ACPI*/ + +static int acpi_init(struct legion_private *priv, struct acpi_device *adev) +{ + int err; + unsigned long cfg; + bool skip_acpi_sta_check; + struct device *dev = &priv->platform_device->dev; + + priv->adev = adev; + if (!priv->adev) { + dev_info(dev, "Could not get ACPI handle\n"); + goto err_acpi_init; + } + + skip_acpi_sta_check = force || (!priv->conf->acpi_check_dev); + if (!skip_acpi_sta_check) { + err = eval_int(priv->adev->handle, "_STA", &cfg); + if (err) { + dev_info(dev, "Could not evaluate ACPI _STA\n"); + goto err_acpi_init; + } + + err = eval_int(priv->adev->handle, "VPC0._CFG", &cfg); + if (err) { + dev_info(dev, "Could not evaluate ACPI _CFG\n"); + goto err_acpi_init; + } + dev_info(dev, "ACPI CFG: %lu\n", cfg); + } else { + dev_info(dev, "Skipping ACPI _STA check"); + } + + return 0; + +err_acpi_init: + return err; +} + +/* ============================= */ +/* White Keyboard Backlight */ +/* ============================ */ +// In style of ideapad-driver and with code modified from ideapad-driver. + +static enum led_brightness +legion_kbd_bl_led_cdev_brightness_get(struct led_classdev *led_cdev) +{ + struct legion_private *priv = + container_of(led_cdev, struct legion_private, kbd_bl.led); + + return legion_kbd_bl_brightness_get(priv); +} + +static int legion_kbd_bl_led_cdev_brightness_set(struct led_classdev *led_cdev, + enum led_brightness brightness) +{ + struct legion_private *priv = + container_of(led_cdev, struct legion_private, kbd_bl.led); + + return legion_kbd_bl_brightness_set(priv, brightness); +} + +static int legion_kbd_bl_init(struct legion_private *priv) +{ + int brightness, err; + + if (WARN_ON(priv->kbd_bl.initialized)) { + pr_info("Keyboard backlight already initialized\n"); + return -EEXIST; + } + + if (priv->conf->access_method_keyboard == ACCESS_METHOD_NO_ACCESS) { + pr_info("Keyboard backlight handling disabled by this driver\n"); + return -ENODEV; + } + + brightness = legion_kbd_bl_brightness_get(priv); + if (brightness < 0) { + pr_info("Error reading keyboard brightness\n"); + return brightness; + } + + priv->kbd_bl.last_brightness = brightness; + + // will be renamed to "platform::kbd_backlight_1" if it exists already + priv->kbd_bl.led.name = "platform::" LED_FUNCTION_KBD_BACKLIGHT; + priv->kbd_bl.led.max_brightness = 2; + priv->kbd_bl.led.brightness_get = legion_kbd_bl_led_cdev_brightness_get; + priv->kbd_bl.led.brightness_set_blocking = + legion_kbd_bl_led_cdev_brightness_set; + priv->kbd_bl.led.flags = LED_BRIGHT_HW_CHANGED; + + err = led_classdev_register(&priv->platform_device->dev, + &priv->kbd_bl.led); + if (err) + return err; + + priv->kbd_bl.initialized = true; + + return 0; +} + +/** + * Deinit keyboard backlight. + * + * Can also be called if init was not successful. + * + */ +static void legion_kbd_bl_exit(struct legion_private *priv) +{ + if (!priv->kbd_bl.initialized) + return; + + priv->kbd_bl.initialized = false; + + led_classdev_unregister(&priv->kbd_bl.led); +} + +/* ============================= */ +/* Additional light driver */ +/* ============================ */ + +static enum led_brightness +legion_wmi_cdev_brightness_get(struct led_classdev *led_cdev) +{ + struct legion_private *priv = + container_of(led_cdev, struct legion_private, kbd_bl.led); + struct light *light_ins = container_of(led_cdev, struct light, led); + + return legion_wmi_light_get(priv, light_ins->light_id, + light_ins->lower_limit, + light_ins->upper_limit); +} + +static int legion_wmi_cdev_brightness_set(struct led_classdev *led_cdev, + enum led_brightness brightness) +{ + struct legion_private *priv = + container_of(led_cdev, struct legion_private, kbd_bl.led); + struct light *light_ins = container_of(led_cdev, struct light, led); + + return legion_wmi_light_set(priv, light_ins->light_id, + light_ins->lower_limit, + light_ins->upper_limit, brightness); +} + +static int legion_light_init(struct legion_private *priv, + struct light *light_ins, u8 light_id, + u8 lower_limit, u8 upper_limit, const char *name) +{ + int brightness, err; + + if (WARN_ON(light_ins->initialized)) { + pr_info("Light already initialized for light: %u\n", + light_ins->light_id); + return -EEXIST; + } + + light_ins->light_id = light_id; + light_ins->lower_limit = lower_limit; + light_ins->upper_limit = upper_limit; + + brightness = legion_wmi_light_get(priv, light_ins->light_id, + light_ins->lower_limit, + light_ins->upper_limit); + if (brightness < 0) { + pr_info("Error reading brightness for light: %u\n", + light_ins->light_id); + return brightness; + } + + light_ins->led.name = name; + light_ins->led.max_brightness = + light_ins->upper_limit - light_ins->lower_limit; + light_ins->led.brightness_get = legion_wmi_cdev_brightness_get; + light_ins->led.brightness_set_blocking = legion_wmi_cdev_brightness_set; + light_ins->led.flags = LED_BRIGHT_HW_CHANGED; + + err = led_classdev_register(&priv->platform_device->dev, + &light_ins->led); + if (err) + return err; + + light_ins->initialized = true; + + return 0; +} + +/** + * Deinit light. + * + * Can also be called if init was not successful. + * + */ +static void legion_light_exit(struct legion_private *priv, + struct light *light_ins) +{ + if (!light_ins->initialized) + return; + + light_ins->initialized = false; + + led_classdev_unregister(&light_ins->led); +} + +/* ============================= */ +/* Platform driver */ +/* ============================ */ + +static int legion_add(struct platform_device *pdev) +{ + struct legion_private *priv; + const struct dmi_system_id *dmi_sys; + int err; + u16 ec_read_id; + bool skip_ec_id_check; + bool is_ec_id_valid; + bool is_denied = true; + bool is_allowed = false; + bool do_load_by_list = false; + bool do_load = false; + //struct legion_private *priv = dev_get_drvdata(&pdev->dev); + dev_info(&pdev->dev, "legion_laptop platform driver probing\n"); + + dev_info( + &pdev->dev, + "Read identifying information: DMI_SYS_VENDOR: %s; DMI_PRODUCT_NAME: %s; DMI_BIOS_VERSION:%s\n", + dmi_get_system_info(DMI_SYS_VENDOR), + dmi_get_system_info(DMI_PRODUCT_NAME), + dmi_get_system_info(DMI_BIOS_VERSION)); + + // TODO: allocate? + priv = &_priv; + priv->platform_device = pdev; + err = legion_shared_init(priv); + if (err) { + dev_info(&pdev->dev, "legion_laptop is forced to load.\n"); + goto err_legion_shared_init; + } + dev_set_drvdata(&pdev->dev, priv); + + // TODO: remove + pr_info("Read identifying information: DMI_SYS_VENDOR: %s; DMI_PRODUCT_NAME: %s; DMI_BIOS_VERSION:%s\n", + dmi_get_system_info(DMI_SYS_VENDOR), + dmi_get_system_info(DMI_PRODUCT_NAME), + dmi_get_system_info(DMI_BIOS_VERSION)); + + dmi_sys = dmi_first_match(optimistic_allowlist); + is_allowed = dmi_sys != NULL; + is_denied = dmi_check_system(denylist); + do_load_by_list = is_allowed && !is_denied; + do_load = do_load_by_list || force; + + dev_info( + &pdev->dev, + "is_denied: %d; is_allowed: %d; do_load_by_list: %d; do_load: %d\n", + is_denied, is_allowed, do_load_by_list, do_load); + + if (!(do_load)) { + dev_info( + &pdev->dev, + "Module not usable for this laptop because it is not in allowlist. Notify the maintainer if you want to add your device or force load with param force.\n"); + err = -ENOMEM; + goto err_model_mismtach; + } + + if (force) + dev_info(&pdev->dev, "legion_laptop is forced to load.\n"); + + if (!do_load_by_list && do_load) { + dev_info( + &pdev->dev, + "legion_laptop is forced to load and would otherwise not be loaded\n"); + } + + // if forced and no module found, use config for first model + if (dmi_sys == NULL) + dmi_sys = &optimistic_allowlist[0]; + dev_info(&pdev->dev, "Using configuration for system: %s\n", + dmi_sys->ident); + + priv->conf = dmi_sys->driver_data; + + err = acpi_init(priv, ACPI_COMPANION(&pdev->dev)); + if (err) { + dev_info(&pdev->dev, "Could not init ACPI access: %d\n", err); + goto err_acpi_init; + } + + // TODO: remove; only used for reverse engineering + pr_info("Creating RAM access to embedded controller\n"); + err = ecram_memoryio_init(&priv->ec_memoryio, + priv->conf->ramio_physical_start, 0, + priv->conf->ramio_size); + if (err) { + dev_info( + &pdev->dev, + "Could not init RAM access to embedded controller: %d\n", + err); + goto err_ecram_memoryio_init; + } + + err = ecram_init(&priv->ecram, priv->conf->memoryio_physical_ec_start, + priv->conf->memoryio_size); + if (err) { + dev_info(&pdev->dev, + "Could not init access to embedded controller: %d\n", + err); + goto err_ecram_init; + } + + ec_read_id = read_ec_id(&priv->ecram, priv->conf); + dev_info(&pdev->dev, "Read embedded controller ID 0x%x\n", ec_read_id); + skip_ec_id_check = force || (!priv->conf->check_embedded_controller_id); + is_ec_id_valid = skip_ec_id_check || + (ec_read_id == priv->conf->embedded_controller_id); + if (!is_ec_id_valid) { + err = -ENOMEM; + dev_info(&pdev->dev, "Expected EC chip id 0x%x but read 0x%x\n", + priv->conf->embedded_controller_id, ec_read_id); + goto err_ecram_id; + } + if (skip_ec_id_check) { + dev_info(&pdev->dev, + "Skipped checking embedded controller id\n"); + } + + dev_info(&pdev->dev, "Creating debugfs interface\n"); + legion_debugfs_init(priv); + + pr_info("Creating sysfs interface\n"); + err = legion_sysfs_init(priv); + if (err) { + dev_info(&pdev->dev, "Failed to create sysfs interface: %d\n", + err); + goto err_sysfs_init; + } + + pr_info("Creating hwmon interface"); + err = legion_hwmon_init(priv); + if (err) { + dev_info(&pdev->dev, "Failed to create hwmon interface: %d\n", + err); + goto err_hwmon_init; + } + + pr_info("Creating platform profile support\n"); + err = legion_platform_profile_init(priv); + if (err) { + dev_info(&pdev->dev, "Failed to create platform profile: %d\n", + err); + goto err_platform_profile; + } + + pr_info("Init WMI driver support\n"); + err = legion_wmi_init(); + if (err) { + dev_info(&pdev->dev, "Failed to init WMI driver: %d\n", err); + goto err_wmi; + } + + pr_info("Init keyboard backlight LED driver\n"); + err = legion_kbd_bl_init(priv); + if (err) { + dev_info( + &pdev->dev, + "Failed to init keyboard backlight LED driver. Skipping ...\n"); + } + + pr_info("Init Y-Logo LED driver\n"); + err = legion_light_init(priv, &priv->ylogo_light, LIGHT_ID_YLOGO, 0, 1, + "platform::ylogo"); + if (err) { + dev_info(&pdev->dev, + "Failed to init Y-Logo LED driver. Skipping ...\n"); + } + + pr_info("Init IO-Port LED driver\n"); + err = legion_light_init(priv, &priv->iport_light, LIGHT_ID_IOPORT, 1, 2, + "platform::ioport"); + if (err) { + dev_info(&pdev->dev, + "Failed to init IO-Port LED driver. Skipping ...\n"); + } + + dev_info(&pdev->dev, "legion_laptop loaded for this device\n"); + return 0; + + // TODO: remove eventually + legion_light_exit(priv, &priv->iport_light); + legion_light_exit(priv, &priv->ylogo_light); + legion_kbd_bl_exit(priv); + legion_wmi_exit(); +err_wmi: + legion_platform_profile_exit(priv); +err_platform_profile: + legion_hwmon_exit(priv); +err_hwmon_init: + legion_sysfs_exit(priv); +err_sysfs_init: + legion_debugfs_exit(priv); +err_ecram_id: + ecram_exit(&priv->ecram); +err_ecram_init: + ecram_memoryio_exit(&priv->ec_memoryio); +err_ecram_memoryio_init: +err_acpi_init: + legion_shared_exit(priv); +err_legion_shared_init: +err_model_mismtach: + dev_info(&pdev->dev, "legion_laptop not loaded for this device\n"); + return err; +} + +static int legion_remove(struct platform_device *pdev) +{ + struct legion_private *priv = dev_get_drvdata(&pdev->dev); + + mutex_lock(&legion_shared_mutex); + priv->loaded = false; + mutex_unlock(&legion_shared_mutex); + + legion_light_exit(priv, &priv->iport_light); + legion_light_exit(priv, &priv->ylogo_light); + legion_kbd_bl_exit(priv); + // first unregister wmi, so toggling powermode does not + // generate events anymore that even might be delayed + legion_wmi_exit(); + legion_platform_profile_exit(priv); + + // toggle power mode to load default setting from embedded controller + // again + toggle_powermode(priv); + + legion_hwmon_exit(priv); + legion_sysfs_exit(priv); + legion_debugfs_exit(priv); + ecram_exit(&priv->ecram); + ecram_memoryio_exit(&priv->ec_memoryio); + legion_shared_exit(priv); + + pr_info("Legion platform unloaded\n"); + return 0; +} + +static int legion_resume(struct platform_device *pdev) +{ + //struct legion_private *priv = dev_get_drvdata(&pdev->dev); + dev_info(&pdev->dev, "Resumed in legion-laptop\n"); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int legion_pm_resume(struct device *dev) +{ + //struct legion_private *priv = dev_get_drvdata(dev); + dev_info(dev, "Resumed PM in legion-laptop\n"); + + return 0; +} +#endif +static SIMPLE_DEV_PM_OPS(legion_pm, NULL, legion_pm_resume); + +// same as ideapad +static const struct acpi_device_id legion_device_ids[] = { + // todo: change to "VPC2004", and also ACPI paths + { "PNP0C09", 0 }, + { "", 0 }, +}; +MODULE_DEVICE_TABLE(acpi, legion_device_ids); + +static struct platform_driver legion_driver = { + .probe = legion_add, + .remove = legion_remove, + .resume = legion_resume, + .driver = { + .name = "legion", + .pm = &legion_pm, + .acpi_match_table = ACPI_PTR(legion_device_ids), + }, +}; + +static int __init legion_init(void) +{ + int err; + + pr_info("Loading legion_laptop\n"); + err = platform_driver_register(&legion_driver); + if (err) { + pr_info("legion_laptop: platform_driver_register failed\n"); + return err; + } + + return 0; +} + +module_init(legion_init); + +static void __exit legion_exit(void) +{ + platform_driver_unregister(&legion_driver); + pr_info("legion_laptop exit\n"); +} + +module_exit(legion_exit); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 2df35e65557d..a52bd9f4b632 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1268,7 +1268,7 @@ struct readahead_control { ._index = i, \ } -#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE) +#define VM_READAHEAD_PAGES (SZ_8M / PAGE_SIZE) void page_cache_ra_unbounded(struct readahead_control *, unsigned long nr_to_read, unsigned long lookahead_count); diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 6030a8235617..60b7fe5fa74a 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -156,6 +156,8 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns, #ifdef CONFIG_USER_NS +extern int unprivileged_userns_clone; + static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { if (ns) @@ -189,6 +191,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns); struct ns_common *ns_get_owner(struct ns_common *ns); #else +#define unprivileged_userns_clone 0 + static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { return &init_user_ns; diff --git a/init/Kconfig b/init/Kconfig index c8a155cf7209..a63f2024953b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -132,6 +132,10 @@ config THREAD_INFO_IN_TASK menu "General setup" +config CACHY + bool "Some kernel tweaks by CachyOS" + default y + config BROKEN bool @@ -1247,6 +1251,22 @@ config USER_NS If unsure, say N. +config USER_NS_UNPRIVILEGED + bool "Allow unprivileged users to create namespaces" + default y + depends on USER_NS + help + When disabled, unprivileged users will not be able to create + new namespaces. Allowing users to create their own namespaces + has been part of several recent local privilege escalation + exploits, so if you need user namespaces but are + paranoid^Wsecurity-conscious you want to disable this. + + This setting can be overridden at runtime via the + kernel.unprivileged_userns_clone sysctl. + + If unsure, say Y. + config PID_NS bool "PID Namespaces" default y @@ -1389,6 +1409,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE with the "-O2" compiler flag for best performance and most helpful compile-time warnings. +config CC_OPTIMIZE_FOR_PERFORMANCE_O3 + bool "Optimize more for performance (-O3)" + help + Choosing this option will pass "-O3" to your compiler to optimize + the kernel yet more for performance. + config CC_OPTIMIZE_FOR_SIZE bool "Optimize for size (-Os)" help diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz index 38ef6d06888e..0f78364efd4f 100644 --- a/kernel/Kconfig.hz +++ b/kernel/Kconfig.hz @@ -40,6 +40,27 @@ choice on SMP and NUMA systems and exactly dividing by both PAL and NTSC frame rates for video and multimedia work. + config HZ_500 + bool "500 HZ" + help + 500 Hz is a balanced timer frequency. Provides fast interactivity + on desktops with good smoothness without increasing CPU power + consumption and sacrificing the battery life on laptops. + + config HZ_600 + bool "600 HZ" + help + 600 Hz is a balanced timer frequency. Provides fast interactivity + on desktops with good smoothness without increasing CPU power + consumption and sacrificing the battery life on laptops. + + config HZ_750 + bool "750 HZ" + help + 750 Hz is a balanced timer frequency. Provides fast interactivity + on desktops with good smoothness without increasing CPU power + consumption and sacrificing the battery life on laptops. + config HZ_1000 bool "1000 HZ" help @@ -53,6 +74,9 @@ config HZ default 100 if HZ_100 default 250 if HZ_250 default 300 if HZ_300 + default 500 if HZ_500 + default 600 if HZ_600 + default 750 if HZ_750 default 1000 if HZ_1000 config SCHED_HRTICK diff --git a/kernel/fork.c b/kernel/fork.c index 3b9cdb42e757..cda4ff6351f0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -102,6 +102,10 @@ #include #include +#ifdef CONFIG_USER_NS +#include +#endif + #include #include #include @@ -2261,6 +2265,10 @@ __latent_entropy struct task_struct *copy_process( if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) return ERR_PTR(-EINVAL); + if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) + if (!capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. @@ -3407,6 +3415,12 @@ int ksys_unshare(unsigned long unshare_flags) if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; + if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) { + err = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto bad_unshare_out; + } + err = check_unshare_flags(unshare_flags); if (err) goto bad_unshare_out; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index aee5e7a70170..083a039d3433 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -73,10 +73,19 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; * * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) */ +#ifdef CONFIG_CACHY +unsigned int sysctl_sched_base_slice = 350000ULL; +static unsigned int normalized_sysctl_sched_base_slice = 350000ULL; +#else unsigned int sysctl_sched_base_slice = 750000ULL; static unsigned int normalized_sysctl_sched_base_slice = 750000ULL; +#endif +#ifdef CONFIG_CACHY +const_debug unsigned int sysctl_sched_migration_cost = 300000UL; +#else const_debug unsigned int sysctl_sched_migration_cost = 500000UL; +#endif int sched_thermal_decay_shift; static int __init setup_sched_thermal_decay_shift(char *str) @@ -127,8 +136,12 @@ int __weak arch_asym_cpu_priority(int cpu) * * (default: 5 msec, units: microseconds) */ +#ifdef CONFIG_CACHY +static unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL; +#else static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; #endif +#endif #ifdef CONFIG_NUMA_BALANCING /* Restrict the NUMA promotion throughput (MB/s) for each target node. */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 8c817d0a92f3..ce68f50e76f5 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2544,7 +2544,7 @@ extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags); extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags); -#ifdef CONFIG_PREEMPT_RT +#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_CACHY) #define SCHED_NR_MIGRATE_BREAK 8 #else #define SCHED_NR_MIGRATE_BREAK 32 diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 157f7ce2942d..c92d8a4b23fb 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -95,6 +95,9 @@ EXPORT_SYMBOL_GPL(sysctl_long_vals); #ifdef CONFIG_PERF_EVENTS static const int six_hundred_forty_kb = 640 * 1024; #endif +#ifdef CONFIG_USER_NS +#include +#endif static const int ngroups_max = NGROUPS_MAX; @@ -1623,6 +1626,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_USER_NS + { + .procname = "unprivileged_userns_clone", + .data = &unprivileged_userns_clone, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #ifdef CONFIG_PROC_SYSCTL { .procname = "tainted", diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index ce4d99df5f0e..8272e2e359f1 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -22,6 +22,13 @@ #include #include +/* sysctl */ +#ifdef CONFIG_USER_NS_UNPRIVILEGED +int unprivileged_userns_clone = 1; +#else +int unprivileged_userns_clone; +#endif + static struct kmem_cache *user_ns_cachep __ro_after_init; static DEFINE_MUTEX(userns_state_mutex); diff --git a/mm/Kconfig b/mm/Kconfig index ffc3a2ba3a8c..0e440573033c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -630,7 +630,7 @@ config COMPACTION config COMPACT_UNEVICTABLE_DEFAULT int depends on COMPACTION - default 0 if PREEMPT_RT + default 0 if PREEMPT_RT || CACHY default 1 # diff --git a/mm/compaction.c b/mm/compaction.c index b961db601df4..91d627e8a93d 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1830,7 +1830,11 @@ static int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNE * aggressively the kernel should compact memory in the * background. It takes values in the range [0, 100]. */ +#ifdef CONFIG_CACHY +static unsigned int __read_mostly sysctl_compaction_proactiveness; +#else static unsigned int __read_mostly sysctl_compaction_proactiveness = 20; +#endif static int sysctl_extfrag_threshold = 500; static int __read_mostly sysctl_compact_memory; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 6790f93fda45..d601fe3a7285 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -62,7 +62,11 @@ unsigned long transparent_hugepage_flags __read_mostly = #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE (1<lock, flags); for (i = 0; i < count; ++i) { @@ -2134,6 +2139,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, if (unlikely(page == NULL)) break; + /* Reschedule and ease the contention on the lock if needed */ + if (i + 1 < count && ((can_resched && need_resched()) || + spin_needbreak(&zone->lock))) { + __mod_zone_page_state(zone, NR_FREE_PAGES, + -((i + 1 - last_mod) << order)); + last_mod = i + 1; + spin_unlock_irqrestore(&zone->lock, flags); + if (can_resched) + cond_resched(); + spin_lock_irqsave(&zone->lock, flags); + } + /* * Split buddy pages returned by expand() are received here in * physical page order. The page is added to the tail of @@ -2150,7 +2167,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, -(1 << order)); } - __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); + __mod_zone_page_state(zone, NR_FREE_PAGES, -((i - last_mod) << order)); spin_unlock_irqrestore(&zone->lock, flags); return i; diff --git a/mm/swap.c b/mm/swap.c index cd8f0150ba3a..42c405a4f114 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -1090,6 +1090,10 @@ void folio_batch_remove_exceptionals(struct folio_batch *fbatch) */ void __init swap_setup(void) { +#ifdef CONFIG_CACHY + /* Only swap-in pages requested, avoid readahead */ + page_cluster = 0; +#else unsigned long megs = totalram_pages() >> (20 - PAGE_SHIFT); /* Use a smaller cluster for small-memory machines */ @@ -1101,4 +1105,5 @@ void __init swap_setup(void) * Right now other parts of the system means that we * _really_ don't want to cluster much more */ +#endif } diff --git a/mm/vmpressure.c b/mm/vmpressure.c index bd5183dfd879..3a410f53a07c 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -43,7 +43,11 @@ static const unsigned long vmpressure_win = SWAP_CLUSTER_MAX * 16; * essence, they are percents: the higher the value, the more number * unsuccessful reclaims there were. */ +#ifdef CONFIG_CACHY +static const unsigned int vmpressure_level_med = 65; +#else static const unsigned int vmpressure_level_med = 60; +#endif static const unsigned int vmpressure_level_critical = 95; /* diff --git a/mm/vmscan.c b/mm/vmscan.c index 4255619a1a31..5a3fbaf34158 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -185,7 +185,11 @@ struct scan_control { /* * From 0 .. 200. Higher means more swappy. */ +#ifdef CONFIG_CACHY +int vm_swappiness = 20; +#else int vm_swappiness = 60; +#endif #ifdef CONFIG_MEMCG @@ -3922,7 +3926,11 @@ static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc } /* to protect the working set of the last N jiffies */ +#ifdef CONFIG_CACHY +static unsigned long lru_gen_min_ttl __read_mostly = 1000; +#else static unsigned long lru_gen_min_ttl __read_mostly; +#endif static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) { diff --git a/sound/soc/amd/acp/acp-mach-common.c b/sound/soc/amd/acp/acp-mach-common.c index 504d1b8c4cbb..3411e8d9aab3 100644 --- a/sound/soc/amd/acp/acp-mach-common.c +++ b/sound/soc/amd/acp/acp-mach-common.c @@ -26,6 +26,7 @@ #include "../../codecs/rt5682s.h" #include "../../codecs/nau8825.h" #include "../../codecs/nau8821.h" +#include "../../codecs/cs35l41.h" #include "acp-mach.h" #define PCO_PLAT_CLK 48000000 @@ -1282,6 +1283,78 @@ SND_SOC_DAILINK_DEF(nau8821, DAILINK_COMP_ARRAY(COMP_CODEC("i2c-NVTN2020:00", "nau8821-hifi"))); +static int acp_cs35l41_init(struct snd_soc_pcm_runtime *rtd) +{ + return 0; +} + +static int acp_cs35l41_startup(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + + runtime->hw.channels_max = DUAL_CHANNEL; + snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS, + &constraints_channels); + snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, + &constraints_rates); + return 0; +} + +static int acp_cs35l41_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +{ + struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); + struct snd_soc_card *card = rtd->card; + struct snd_soc_dai *codec_dai; + int ret, i; + unsigned int num_codecs = rtd->dai_link->num_codecs; + unsigned int bclk_val; + + ret = 0; + for (i = 0; i < num_codecs; i++) { + codec_dai = snd_soc_rtd_to_codec(rtd, i); + if (strcmp(codec_dai->name, "cs35l41-pcm") == 0) { + switch (params_rate(params)) { + case 48000: + bclk_val = 1536000; + break; + default: + dev_err(card->dev, "Invalid Samplerate:0x%x\n", + params_rate(params)); + return -EINVAL; + } + ret = snd_soc_component_set_sysclk(codec_dai->component, + 0, 0, bclk_val, SND_SOC_CLOCK_IN); + if (ret < 0) { + dev_err(card->dev, "failed to set sysclk for CS35l41 dai\n"); + return ret; + } + } + } + + return ret; +} + +static struct snd_soc_codec_conf cs35l41_conf[] = { + { + .dlc = COMP_CODEC_CONF("spi-VLV1776:00"), + .name_prefix = "Left", + }, + { + .dlc = COMP_CODEC_CONF("spi-VLV1776:01"), + .name_prefix = "Right", + }, +}; + +static const struct snd_soc_ops acp_cs35l41_ops = { + .startup = acp_cs35l41_startup, + .hw_params = acp_cs35l41_hw_params, +}; + +SND_SOC_DAILINK_DEF(cs35l41, + DAILINK_COMP_ARRAY(COMP_CODEC("spi-VLV1776:00", "cs35l41-pcm"), + COMP_CODEC("spi-VLV1776:01", "cs35l41-pcm"))); + /* Declare DMIC codec components */ SND_SOC_DAILINK_DEF(dmic_codec, DAILINK_COMP_ARRAY(COMP_CODEC("dmic-codec", "dmic-hifi"))); @@ -1481,6 +1554,7 @@ int acp_sofdsp_dai_links_create(struct snd_soc_card *card) links[i].platforms = sof_component; links[i].num_platforms = ARRAY_SIZE(sof_component); links[i].dpcm_playback = 1; + links[i].dpcm_capture = 1; links[i].nonatomic = true; links[i].no_pcm = 1; if (!drv_data->amp_codec_id) { @@ -1513,6 +1587,7 @@ int acp_sofdsp_dai_links_create(struct snd_soc_card *card) links[i].platforms = sof_component; links[i].num_platforms = ARRAY_SIZE(sof_component); links[i].dpcm_playback = 1; + links[i].dpcm_capture = 1; links[i].nonatomic = true; links[i].no_pcm = 1; if (!drv_data->amp_codec_id) { @@ -1597,6 +1672,8 @@ int acp_legacy_dai_links_create(struct snd_soc_card *card) if (drv_data->hs_cpu_id) num_links++; + if (drv_data->bt_cpu_id) + num_links++; if (drv_data->amp_cpu_id) num_links++; if (drv_data->dmic_cpu_id) @@ -1744,6 +1821,33 @@ int acp_legacy_dai_links_create(struct snd_soc_card *card) card->codec_conf = rt1019_conf; card->num_configs = ARRAY_SIZE(rt1019_conf); } + if (drv_data->amp_codec_id == CS35L41) { + links[i].codecs = cs35l41; + links[i].num_codecs = ARRAY_SIZE(cs35l41); + links[i].init = acp_cs35l41_init; + card->codec_conf = cs35l41_conf; + card->num_configs = ARRAY_SIZE(cs35l41_conf); + links[i].ops = &acp_cs35l41_ops; + } + i++; + } + + if (drv_data->bt_cpu_id == I2S_BT) { + links[i].name = "acp-bt-codec"; + links[i].id = BT_BE_ID; + links[i].cpus = sof_bt; + links[i].num_cpus = ARRAY_SIZE(sof_bt); + links[i].platforms = sof_component; + links[i].num_platforms = ARRAY_SIZE(sof_component); + links[i].dpcm_playback = 1; + links[i].dpcm_capture = 1; + links[i].nonatomic = true; + links[i].no_pcm = 1; + if (!drv_data->bt_codec_id) { + /* Use dummy codec if codec id not specified */ + links[i].codecs = &snd_soc_dummy_dlc; + links[i].num_codecs = 1; + } i++; } diff --git a/sound/soc/amd/acp/acp-mach.h b/sound/soc/amd/acp/acp-mach.h index a48546d8d407..1d38025307b5 100644 --- a/sound/soc/amd/acp/acp-mach.h +++ b/sound/soc/amd/acp/acp-mach.h @@ -27,8 +27,8 @@ enum be_id { HEADSET_BE_ID = 0, AMP_BE_ID, - DMIC_BE_ID, BT_BE_ID, + DMIC_BE_ID, }; enum cpu_endpoints { @@ -46,6 +46,7 @@ enum codec_endpoints { MAX98360A, RT5682S, NAU8825, + CS35L41, NAU8821, MAX98388, ES83XX, diff --git a/sound/soc/codecs/max98388.c b/sound/soc/codecs/max98388.c index 078adec29312..d8974805b5f8 100644 --- a/sound/soc/codecs/max98388.c +++ b/sound/soc/codecs/max98388.c @@ -390,27 +390,43 @@ static void max98388_reset(struct max98388_priv *max98388, struct device *dev) { int ret, reg, count; + /* Software Reset */ ret = regmap_update_bits(max98388->regmap, MAX98388_R2000_SW_RESET, MAX98388_SOFT_RESET, MAX98388_SOFT_RESET); - if (ret) + + if (ret) { dev_err(dev, "Reset command failed. (ret:%d)\n", ret); + goto exit; + } + count = 0; while (count < 3) { usleep_range(10000, 11000); + /* Software Reset Verification */ ret = regmap_read(max98388->regmap, MAX98388_R22FF_REV_ID, ®); + if (!ret) { dev_info(dev, "Reset completed (retry:%d)\n", count); - return; + goto exit; } count++; } + dev_err(dev, "Reset failed. (ret:%d)\n", ret); + + +exit: + regcache_cache_only(max98388->regmap, true); + ret = regmap_update_bits(max98388->regmap, + MAX98388_R2000_SW_RESET, + MAX98388_SOFT_RESET, 0); + regcache_cache_only(max98388->regmap, false); } static int max98388_probe(struct snd_soc_component *component) @@ -419,6 +435,7 @@ static int max98388_probe(struct snd_soc_component *component) /* Software Reset */ max98388_reset(max98388, component->dev); + usleep_range(400, 1000); /* General channel source configuration */ regmap_write(max98388->regmap, @@ -811,6 +828,7 @@ static bool max98388_readable_register(struct device *dev, case MAX98388_R210E_AUTO_RESTART: case MAX98388_R210F_GLOBAL_EN: case MAX98388_R22FF_REV_ID: + case MAX98388_R2000_SW_RESET: return true; default: return false; @@ -823,6 +841,7 @@ static bool max98388_volatile_reg(struct device *dev, unsigned int reg) case MAX98388_R2001_INT_RAW1 ... MAX98388_R2005_INT_STATE2: case MAX98388_R210F_GLOBAL_EN: case MAX98388_R22FF_REV_ID: + case MAX98388_R2000_SW_RESET: return true; default: return false; @@ -866,6 +885,7 @@ static int max98388_resume(struct device *dev) regcache_cache_only(max98388->regmap, false); max98388_reset(max98388, dev); + usleep_range(400, 1000); regcache_sync(max98388->regmap); return 0; -- 2.45.0 From 6ac41dbad8749bca3d55f0fdeb26cc82a2b29380 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Sat, 27 Apr 2024 20:13:10 +0200 Subject: [PATCH 6/9] fixes Signed-off-by: Peter Jung --- .../ABI/testing/sysfs-driver-hid-asus | 85 + arch/Kconfig | 4 +- drivers/hid/Makefile | 2 + drivers/hid/{hid-asus.c => hid-asus-core.c} | 193 +-- drivers/hid/hid-asus-rog.c | 1468 +++++++++++++++++ drivers/hid/hid-asus-rog.h | 482 ++++++ drivers/hid/hid-asus.h | 58 + drivers/hid/hid-ids.h | 1 + 8 files changed, 2174 insertions(+), 119 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-driver-hid-asus rename drivers/hid/{hid-asus.c => hid-asus-core.c} (89%) create mode 100644 drivers/hid/hid-asus-rog.c create mode 100644 drivers/hid/hid-asus-rog.h create mode 100644 drivers/hid/hid-asus.h diff --git a/Documentation/ABI/testing/sysfs-driver-hid-asus b/Documentation/ABI/testing/sysfs-driver-hid-asus new file mode 100644 index 000000000000..df5b0c5b0702 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-hid-asus @@ -0,0 +1,85 @@ +What: /sys/bus/usb/devices/1-3:1.0/0003:0B05:1ABE.0001/gamepad_mode +Date: December 2023 +Contact: linux-input@vger.kernel.org +Description: Set the mode the ROG Ally xpad operates in: + - 1 = Game mode + - 2 = WASD mode + - 3 = Mouse mode + This setting applies instantly and applies settings that were previously changed + under that mode which are: + - deadzones + - anti-deadzones + - button mapping + - button turbo settings + - response curves + +What: /sys/bus/usb/devices/1-3:1.0/0003:0B05:1ABE.0001/apply +Date: December 2023 +Contact: linux-input@vger.kernel.org +Description: Apply the settings that have been stored in attributes so far. Because there are + many individual settings across a dozen packets this separation is required to + prevent spamming the MCU when userspace applications apply many changes at once. + +What: /sys/bus/usb/devices/1-3:1.0/0003:0B05:1ABE.0001/reset_btn_mapping +Date: December 2023 +Contact: linux-input@vger.kernel.org +Description: Reset a gamepad mode to its default button mapping. + +What: /sys/bus/usb/devices/1-3:1.0/0003:0B05:1ABE.0001/axis__/deadzone +Date: December 2023 +Contact: linux-input@vger.kernel.org +Description: Set the inner and outer deadzones of joysticks and triggers. These settings are not + written to the MCU until `apply` is set. + - range 0-64 (corresponds to 0-100%) + +What: /sys/bus/usb/devices/1-3:1.0/0003:0B05:1ABE.0001/axis__/deadzone_index +Date: December 2023 +Contact: linux-input@vger.kernel.org +Description: Descriptive labels for joystick deadzone array. + +What: /sys/bus/usb/devices/1-3:1.0/0003:0B05:1ABE.0001/axis__/anti-deadzone +Date: December 2023 +Contact: linux-input@vger.kernel.org +Description: Set the joystick anti-deadzone feature: + - range 0-32 (corresponds to 0-50%) + +What: /sys/bus/usb/devices/1-3:1.0/0003:0B05:1ABE.0001/axis__/calibration +Date: December 2023 +Contact: linux-input@vger.kernel.org +Description: Calibration values for the joysticks and trigger analogues. There are no default + values as the calibration is determined in userspace. + +What: /sys/bus/usb/devices/1-3:1.0/0003:0B05:1ABE.0001/axis__/calibration_index +Date: December 2023 +Contact: linux-input@vger.kernel.org +Description: Descriptive labels for joystick and triggers calibration array. + +What: /sys/bus/usb/devices/1-3:1.0/0003:0B05:1ABE.0001/axis__/rc_point +Date: December 2023 +Contact: linux-input@vger.kernel.org +Description: Set the joystick response curve. There are 4 points available with 1 being the lowest + point and 4 being the highest point. + - range 0-64 (corresponds to 0-100%) + +What: /sys/bus/usb/devices/1-3:1.0/0003:0B05:1ABE.0001/axis__/rc_point_index +Date: December 2023 +Contact: linux-input@vger.kernel.org +Description: Descriptive labels for joystick response curve points. + +What: /sys/bus/usb/devices/1-3:1.0/0003:0B05:1ABE.0001/btn_