diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst index c18d94fa64704..f8ebb63b6c5d2 100644 --- a/Documentation/admin-guide/kdump/vmcoreinfo.rst +++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst @@ -624,3 +624,9 @@ Used to get the correct ranges: * VMALLOC_START ~ VMALLOC_END : vmalloc() / ioremap() space. * VMEMMAP_START ~ VMEMMAP_END : vmemmap space, used for struct page array. * KERNEL_LINK_ADDR : start address of Kernel link and BPF + +va_kernel_pa_offset +------------------- + +Indicates the offset between the kernel virtual and physical mappings. +Used to translate virtual to physical addresses. diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index cd46e2b20a814..3ce6e4aebdef6 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -143,6 +143,10 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-500 | #841119,826419 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | MMU-600 | #1076982,1209401| N/A | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | MMU-700 | #2268618,2812531| N/A | ++----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_845719 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/Makefile b/Makefile index 5547e02f6104a..bf463afef54bf 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 4 -SUBLEVEL = 9 +SUBLEVEL = 10 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth diff --git a/arch/arm/boot/dts/at91-qil_a9260.dts b/arch/arm/boot/dts/at91-qil_a9260.dts index 9d26f99963483..5ccb3c139592d 100644 --- a/arch/arm/boot/dts/at91-qil_a9260.dts +++ b/arch/arm/boot/dts/at91-qil_a9260.dts @@ -108,7 +108,7 @@ status = "okay"; }; - shdwc@fffffd10 { + shdwc: poweroff@fffffd10 { atmel,wakeup-counter = <10>; atmel,wakeup-rtt-timer; }; diff --git a/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts b/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts index 52ddd0571f1c0..d0a6dbd377dfa 100644 --- a/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts +++ b/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts @@ -139,7 +139,7 @@ }; }; - shdwc@f8048010 { + poweroff@f8048010 { debounce-delay-us = <976>; atmel,wakeup-rtc-timer; diff --git a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts index bf1c9ca72a9f3..200b20515ab12 100644 --- a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts +++ b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts @@ -204,7 +204,7 @@ }; }; - shdwc@f8048010 { + poweroff@f8048010 { debounce-delay-us = <976>; input@0 { diff --git a/arch/arm/boot/dts/at91-sama5d2_xplained.dts b/arch/arm/boot/dts/at91-sama5d2_xplained.dts index 2d53c47d7cc86..6680031387e8c 100644 --- a/arch/arm/boot/dts/at91-sama5d2_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d2_xplained.dts @@ -348,7 +348,7 @@ }; }; - shdwc@f8048010 { + poweroff@f8048010 { debounce-delay-us = <976>; atmel,wakeup-rtc-timer; diff --git a/arch/arm/boot/dts/at91rm9200.dtsi b/arch/arm/boot/dts/at91rm9200.dtsi index 6f9004ebf4245..37b500f6f3956 100644 --- a/arch/arm/boot/dts/at91rm9200.dtsi +++ b/arch/arm/boot/dts/at91rm9200.dtsi @@ -102,7 +102,7 @@ reg = <0xffffff00 0x100>; }; - pmc: pmc@fffffc00 { + pmc: clock-controller@fffffc00 { compatible = "atmel,at91rm9200-pmc", "syscon"; reg = <0xfffffc00 0x100>; interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi index 789fe356dbf60..35a007365b6a5 100644 --- a/arch/arm/boot/dts/at91sam9260.dtsi +++ b/arch/arm/boot/dts/at91sam9260.dtsi @@ -115,7 +115,7 @@ reg = <0xffffee00 0x200>; }; - pmc: pmc@fffffc00 { + pmc: clock-controller@fffffc00 { compatible = "atmel,at91sam9260-pmc", "syscon"; reg = <0xfffffc00 0x100>; interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; @@ -130,7 +130,7 @@ clocks = <&pmc PMC_TYPE_CORE PMC_SLOW>; }; - shdwc@fffffd10 { + shdwc: poweroff@fffffd10 { compatible = "atmel,at91sam9260-shdwc"; reg = <0xfffffd10 0x10>; clocks = <&pmc PMC_TYPE_CORE PMC_SLOW>; diff --git a/arch/arm/boot/dts/at91sam9260ek.dts b/arch/arm/boot/dts/at91sam9260ek.dts index bb72f050a4fef..720c15472c4a5 100644 --- a/arch/arm/boot/dts/at91sam9260ek.dts +++ b/arch/arm/boot/dts/at91sam9260ek.dts @@ -112,7 +112,7 @@ }; }; - shdwc@fffffd10 { + shdwc: poweroff@fffffd10 { atmel,wakeup-counter = <10>; atmel,wakeup-rtt-timer; }; diff --git a/arch/arm/boot/dts/at91sam9261.dtsi b/arch/arm/boot/dts/at91sam9261.dtsi index ee0bd1aceb3f0..528ffc6f6f962 100644 --- a/arch/arm/boot/dts/at91sam9261.dtsi +++ b/arch/arm/boot/dts/at91sam9261.dtsi @@ -599,7 +599,7 @@ }; }; - pmc: pmc@fffffc00 { + pmc: clock-controller@fffffc00 { compatible = "atmel,at91sam9261-pmc", "syscon"; reg = <0xfffffc00 0x100>; interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; @@ -614,7 +614,7 @@ clocks = <&slow_xtal>; }; - shdwc@fffffd10 { + poweroff@fffffd10 { compatible = "atmel,at91sam9260-shdwc"; reg = <0xfffffd10 0x10>; clocks = <&slow_xtal>; diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi index 3ce9ea9873129..75d8ff2d12c8a 100644 --- a/arch/arm/boot/dts/at91sam9263.dtsi +++ b/arch/arm/boot/dts/at91sam9263.dtsi @@ -101,7 +101,7 @@ atmel,external-irqs = <30 31>; }; - pmc: pmc@fffffc00 { + pmc: clock-controller@fffffc00 { compatible = "atmel,at91sam9263-pmc", "syscon"; reg = <0xfffffc00 0x100>; interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; @@ -158,7 +158,7 @@ clocks = <&slow_xtal>; }; - shdwc@fffffd10 { + poweroff@fffffd10 { compatible = "atmel,at91sam9260-shdwc"; reg = <0xfffffd10 0x10>; clocks = <&slow_xtal>; diff --git a/arch/arm/boot/dts/at91sam9g20.dtsi b/arch/arm/boot/dts/at91sam9g20.dtsi index 708e1646b7f46..738a43ffd2281 100644 --- a/arch/arm/boot/dts/at91sam9g20.dtsi +++ b/arch/arm/boot/dts/at91sam9g20.dtsi @@ -41,7 +41,7 @@ atmel,adc-startup-time = <40>; }; - pmc: pmc@fffffc00 { + pmc: clock-controller@fffffc00 { compatible = "atmel,at91sam9g20-pmc", "atmel,at91sam9260-pmc", "syscon"; }; }; diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi index 024af2db638eb..565b99e79c520 100644 --- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi +++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi @@ -126,7 +126,7 @@ }; }; - shdwc@fffffd10 { + shdwc: poweroff@fffffd10 { atmel,wakeup-counter = <10>; atmel,wakeup-rtt-timer; }; diff --git a/arch/arm/boot/dts/at91sam9g25.dtsi b/arch/arm/boot/dts/at91sam9g25.dtsi index d2f13afb35eaf..ec3c77221881c 100644 --- a/arch/arm/boot/dts/at91sam9g25.dtsi +++ b/arch/arm/boot/dts/at91sam9g25.dtsi @@ -26,7 +26,7 @@ >; }; - pmc: pmc@fffffc00 { + pmc: clock-controller@fffffc00 { compatible = "atmel,at91sam9g25-pmc", "atmel,at91sam9x5-pmc", "syscon"; }; }; diff --git a/arch/arm/boot/dts/at91sam9g35.dtsi b/arch/arm/boot/dts/at91sam9g35.dtsi index 48c2bc4a7753d..c9cfb93092ee6 100644 --- a/arch/arm/boot/dts/at91sam9g35.dtsi +++ b/arch/arm/boot/dts/at91sam9g35.dtsi @@ -25,7 +25,7 @@ >; }; - pmc: pmc@fffffc00 { + pmc: clock-controller@fffffc00 { compatible = "atmel,at91sam9g35-pmc", "atmel,at91sam9x5-pmc", "syscon"; }; }; diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi index 95f5d76234dbb..7cccc606e36cd 100644 --- a/arch/arm/boot/dts/at91sam9g45.dtsi +++ b/arch/arm/boot/dts/at91sam9g45.dtsi @@ -129,7 +129,7 @@ reg = <0xffffea00 0x200>; }; - pmc: pmc@fffffc00 { + pmc: clock-controller@fffffc00 { compatible = "atmel,at91sam9g45-pmc", "syscon"; reg = <0xfffffc00 0x100>; interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; @@ -152,7 +152,7 @@ }; - shdwc@fffffd10 { + poweroff@fffffd10 { compatible = "atmel,at91sam9rl-shdwc"; reg = <0xfffffd10 0x10>; clocks = <&clk32k>; @@ -923,7 +923,7 @@ status = "disabled"; }; - clk32k: sckc@fffffd50 { + clk32k: clock-controller@fffffd50 { compatible = "atmel,at91sam9x5-sckc"; reg = <0xfffffd50 0x4>; clocks = <&slow_xtal>; diff --git a/arch/arm/boot/dts/at91sam9n12.dtsi b/arch/arm/boot/dts/at91sam9n12.dtsi index 83114d26f10d0..16a9a908985da 100644 --- a/arch/arm/boot/dts/at91sam9n12.dtsi +++ b/arch/arm/boot/dts/at91sam9n12.dtsi @@ -118,7 +118,7 @@ reg = <0xffffea00 0x200>; }; - pmc: pmc@fffffc00 { + pmc: clock-controller@fffffc00 { compatible = "atmel,at91sam9n12-pmc", "syscon"; reg = <0xfffffc00 0x200>; #clock-cells = <2>; @@ -140,7 +140,7 @@ clocks = <&pmc PMC_TYPE_CORE PMC_MCK>; }; - shdwc@fffffe10 { + poweroff@fffffe10 { compatible = "atmel,at91sam9x5-shdwc"; reg = <0xfffffe10 0x10>; clocks = <&clk32k>; diff --git a/arch/arm/boot/dts/at91sam9rl.dtsi b/arch/arm/boot/dts/at91sam9rl.dtsi index 364a2ff0a763d..3d089ffbe1626 100644 --- a/arch/arm/boot/dts/at91sam9rl.dtsi +++ b/arch/arm/boot/dts/at91sam9rl.dtsi @@ -763,7 +763,7 @@ }; }; - pmc: pmc@fffffc00 { + pmc: clock-controller@fffffc00 { compatible = "atmel,at91sam9rl-pmc", "syscon"; reg = <0xfffffc00 0x100>; interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; @@ -778,7 +778,7 @@ clocks = <&clk32k>; }; - shdwc@fffffd10 { + poweroff@fffffd10 { compatible = "atmel,at91sam9260-shdwc"; reg = <0xfffffd10 0x10>; clocks = <&clk32k>; @@ -799,7 +799,7 @@ status = "disabled"; }; - clk32k: sckc@fffffd50 { + clk32k: clock-controller@fffffd50 { compatible = "atmel,at91sam9x5-sckc"; reg = <0xfffffd50 0x4>; clocks = <&slow_xtal>; diff --git a/arch/arm/boot/dts/at91sam9x25.dtsi b/arch/arm/boot/dts/at91sam9x25.dtsi index 0fe8802e1242b..7036f5f045715 100644 --- a/arch/arm/boot/dts/at91sam9x25.dtsi +++ b/arch/arm/boot/dts/at91sam9x25.dtsi @@ -27,7 +27,7 @@ >; }; - pmc: pmc@fffffc00 { + pmc: clock-controller@fffffc00 { compatible = "atmel,at91sam9x25-pmc", "atmel,at91sam9x5-pmc", "syscon"; }; }; diff --git a/arch/arm/boot/dts/at91sam9x35.dtsi b/arch/arm/boot/dts/at91sam9x35.dtsi index 0bfa21f18f870..eb03b0497e371 100644 --- a/arch/arm/boot/dts/at91sam9x35.dtsi +++ b/arch/arm/boot/dts/at91sam9x35.dtsi @@ -26,7 +26,7 @@ >; }; - pmc: pmc@fffffc00 { + pmc: clock-controller@fffffc00 { compatible = "atmel,at91sam9x35-pmc", "atmel,at91sam9x5-pmc", "syscon"; }; }; diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index 0c26c925761b2..a1fed912f2eea 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -126,7 +126,7 @@ reg = <0xffffea00 0x200>; }; - pmc: pmc@fffffc00 { + pmc: clock-controller@fffffc00 { compatible = "atmel,at91sam9x5-pmc", "syscon"; reg = <0xfffffc00 0x200>; interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; @@ -141,7 +141,7 @@ clocks = <&clk32k>; }; - shutdown_controller: shdwc@fffffe10 { + shutdown_controller: poweroff@fffffe10 { compatible = "atmel,at91sam9x5-shdwc"; reg = <0xfffffe10 0x10>; clocks = <&clk32k>; @@ -154,7 +154,7 @@ clocks = <&pmc PMC_TYPE_CORE PMC_MCK>; }; - clk32k: sckc@fffffe50 { + clk32k: clock-controller@fffffe50 { compatible = "atmel,at91sam9x5-sckc"; reg = <0xfffffe50 0x4>; clocks = <&slow_xtal>; diff --git a/arch/arm/boot/dts/imx53-sk-imx53.dts b/arch/arm/boot/dts/imx53-sk-imx53.dts index 103e73176e47d..1a00d290092ad 100644 --- a/arch/arm/boot/dts/imx53-sk-imx53.dts +++ b/arch/arm/boot/dts/imx53-sk-imx53.dts @@ -60,6 +60,16 @@ status = "okay"; }; +&cpu0 { + /* CPU rated to 800 MHz, not the default 1.2GHz. */ + operating-points = < + /* kHz uV */ + 166666 850000 + 400000 900000 + 800000 1050000 + >; +}; + &ecspi1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_ecspi1>; diff --git a/arch/arm/boot/dts/imx6sll.dtsi b/arch/arm/boot/dts/imx6sll.dtsi index 2873369a57c02..3659fd5ecfa62 100644 --- a/arch/arm/boot/dts/imx6sll.dtsi +++ b/arch/arm/boot/dts/imx6sll.dtsi @@ -552,7 +552,7 @@ reg = <0x020ca000 0x1000>; interrupts = ; clocks = <&clks IMX6SLL_CLK_USBPHY2>; - phy-reg_3p0-supply = <®_3p0>; + phy-3p0-supply = <®_3p0>; fsl,anatop = <&anatop>; }; diff --git a/arch/arm/boot/dts/sam9x60.dtsi b/arch/arm/boot/dts/sam9x60.dtsi index e67ede940071f..73d570a172690 100644 --- a/arch/arm/boot/dts/sam9x60.dtsi +++ b/arch/arm/boot/dts/sam9x60.dtsi @@ -172,7 +172,7 @@ status = "disabled"; uart4: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <13 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -240,7 +240,7 @@ status = "disabled"; uart5: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; atmel,usart-mode = ; interrupts = <14 IRQ_TYPE_LEVEL_HIGH 7>; @@ -370,7 +370,7 @@ status = "disabled"; uart11: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <32 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -419,7 +419,7 @@ status = "disabled"; uart12: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <33 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -576,7 +576,7 @@ status = "disabled"; uart6: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <9 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -625,7 +625,7 @@ status = "disabled"; uart7: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <10 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -674,7 +674,7 @@ status = "disabled"; uart8: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <11 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -723,7 +723,7 @@ status = "disabled"; uart0: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <5 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -791,7 +791,7 @@ status = "disabled"; uart1: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <6 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -859,7 +859,7 @@ status = "disabled"; uart2: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <7 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -927,7 +927,7 @@ status = "disabled"; uart3: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <8 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -1050,7 +1050,7 @@ status = "disabled"; uart9: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <15 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -1099,7 +1099,7 @@ status = "disabled"; uart10: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <16 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -1282,7 +1282,7 @@ }; }; - pmc: pmc@fffffc00 { + pmc: clock-controller@fffffc00 { compatible = "microchip,sam9x60-pmc", "syscon"; reg = <0xfffffc00 0x200>; interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; @@ -1297,7 +1297,7 @@ clocks = <&clk32k 0>; }; - shutdown_controller: shdwc@fffffe10 { + shutdown_controller: poweroff@fffffe10 { compatible = "microchip,sam9x60-shdwc"; reg = <0xfffffe10 0x10>; clocks = <&clk32k 0>; @@ -1322,7 +1322,7 @@ clocks = <&pmc PMC_TYPE_CORE PMC_MCK>; }; - clk32k: sckc@fffffe50 { + clk32k: clock-controller@fffffe50 { compatible = "microchip,sam9x60-sckc"; reg = <0xfffffe50 0x4>; clocks = <&slow_xtal>; diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 14c35c12a115f..8ae270fabfa82 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -284,7 +284,7 @@ clock-names = "dma_clk"; }; - pmc: pmc@f0014000 { + pmc: clock-controller@f0014000 { compatible = "atmel,sama5d2-pmc", "syscon"; reg = <0xf0014000 0x160>; interrupts = <74 IRQ_TYPE_LEVEL_HIGH 7>; @@ -680,7 +680,7 @@ clocks = <&clk32k>; }; - shutdown_controller: shdwc@f8048010 { + shutdown_controller: poweroff@f8048010 { compatible = "atmel,sama5d2-shdwc"; reg = <0xf8048010 0x10>; clocks = <&clk32k>; @@ -704,7 +704,7 @@ status = "disabled"; }; - clk32k: sckc@f8048050 { + clk32k: clock-controller@f8048050 { compatible = "atmel,sama5d4-sckc"; reg = <0xf8048050 0x4>; diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi index bde8e92d60bb1..d9e66700d1c20 100644 --- a/arch/arm/boot/dts/sama5d3.dtsi +++ b/arch/arm/boot/dts/sama5d3.dtsi @@ -1001,7 +1001,7 @@ }; }; - pmc: pmc@fffffc00 { + pmc: clock-controller@fffffc00 { compatible = "atmel,sama5d3-pmc", "syscon"; reg = <0xfffffc00 0x120>; interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; @@ -1016,7 +1016,7 @@ clocks = <&clk32k>; }; - shutdown_controller: shutdown-controller@fffffe10 { + shutdown_controller: poweroff@fffffe10 { compatible = "atmel,at91sam9x5-shdwc"; reg = <0xfffffe10 0x10>; clocks = <&clk32k>; @@ -1040,7 +1040,7 @@ status = "disabled"; }; - clk32k: sckc@fffffe50 { + clk32k: clock-controller@fffffe50 { compatible = "atmel,sama5d3-sckc"; reg = <0xfffffe50 0x4>; clocks = <&slow_xtal>; diff --git a/arch/arm/boot/dts/sama5d3_emac.dtsi b/arch/arm/boot/dts/sama5d3_emac.dtsi index 45226108850d2..5d7ce13de8ccf 100644 --- a/arch/arm/boot/dts/sama5d3_emac.dtsi +++ b/arch/arm/boot/dts/sama5d3_emac.dtsi @@ -30,7 +30,7 @@ }; }; - pmc: pmc@fffffc00 { + pmc: clock-controller@fffffc00 { }; macb1: ethernet@f802c000 { diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi index af62157ae214f..41284e013f531 100644 --- a/arch/arm/boot/dts/sama5d4.dtsi +++ b/arch/arm/boot/dts/sama5d4.dtsi @@ -250,7 +250,7 @@ clock-names = "dma_clk"; }; - pmc: pmc@f0018000 { + pmc: clock-controller@f0018000 { compatible = "atmel,sama5d4-pmc", "syscon"; reg = <0xf0018000 0x120>; interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; @@ -740,7 +740,7 @@ clocks = <&clk32k>; }; - shutdown_controller: shdwc@fc068610 { + shutdown_controller: poweroff@fc068610 { compatible = "atmel,at91sam9x5-shdwc"; reg = <0xfc068610 0x10>; clocks = <&clk32k>; @@ -761,7 +761,7 @@ status = "disabled"; }; - clk32k: sckc@fc068650 { + clk32k: clock-controller@fc068650 { compatible = "atmel,sama5d4-sckc"; reg = <0xfc068650 0x4>; #clock-cells = <0>; diff --git a/arch/arm/boot/dts/sama7g5.dtsi b/arch/arm/boot/dts/sama7g5.dtsi index 929ba73702e93..9642a42d84e60 100644 --- a/arch/arm/boot/dts/sama7g5.dtsi +++ b/arch/arm/boot/dts/sama7g5.dtsi @@ -241,7 +241,7 @@ clocks = <&pmc PMC_TYPE_PERIPHERAL 11>; }; - pmc: pmc@e0018000 { + pmc: clock-controller@e0018000 { compatible = "microchip,sama7g5-pmc", "syscon"; reg = <0xe0018000 0x200>; interrupts = ; @@ -257,7 +257,7 @@ clocks = <&clk32k 0>; }; - shdwc: shdwc@e001d010 { + shdwc: poweroff@e001d010 { compatible = "microchip,sama7g5-shdwc", "syscon"; reg = <0xe001d010 0x10>; clocks = <&clk32k 0>; diff --git a/arch/arm/boot/dts/usb_a9260.dts b/arch/arm/boot/dts/usb_a9260.dts index 6cfa83921ac26..66f8da89007db 100644 --- a/arch/arm/boot/dts/usb_a9260.dts +++ b/arch/arm/boot/dts/usb_a9260.dts @@ -22,7 +22,7 @@ ahb { apb { - shdwc@fffffd10 { + shdwc: poweroff@fffffd10 { atmel,wakeup-counter = <10>; atmel,wakeup-rtt-timer; }; diff --git a/arch/arm/boot/dts/usb_a9263.dts b/arch/arm/boot/dts/usb_a9263.dts index b6cb9cdf81973..45745915b2e16 100644 --- a/arch/arm/boot/dts/usb_a9263.dts +++ b/arch/arm/boot/dts/usb_a9263.dts @@ -67,7 +67,7 @@ }; }; - shdwc@fffffd10 { + poweroff@fffffd10 { atmel,wakeup-counter = <10>; atmel,wakeup-rtt-timer; }; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts index 38ae674f2f02a..3037f58057c9f 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts @@ -145,7 +145,7 @@ status = "okay"; clock-frequency = <100000>; i2c-sda-falling-time-ns = <890>; /* hcnt */ - i2c-sdl-falling-time-ns = <890>; /* lcnt */ + i2c-scl-falling-time-ns = <890>; /* lcnt */ pinctrl-names = "default", "gpio"; pinctrl-0 = <&i2c1_pmx_func>; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts index ede99dcc05580..f4cf30bac5574 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts @@ -141,7 +141,7 @@ status = "okay"; clock-frequency = <100000>; i2c-sda-falling-time-ns = <890>; /* hcnt */ - i2c-sdl-falling-time-ns = <890>; /* lcnt */ + i2c-scl-falling-time-ns = <890>; /* lcnt */ adc@14 { compatible = "lltc,ltc2497"; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts b/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts index 03e7679217b24..479948f8a4b75 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts @@ -141,7 +141,7 @@ }; &gpio1 { - gpio-line-names = "nINT_ETHPHY", "LED_RED", "WDOG_INT", "X_RTC_INT", + gpio-line-names = "", "LED_RED", "WDOG_INT", "X_RTC_INT", "", "", "", "RESET_ETHPHY", "CAN_nINT", "CAN_EN", "nENABLE_FLATLINK", "", "USB_OTG_VBUS_EN", "", "LED_GREEN", "LED_BLUE"; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi index 92616bc4f71f5..847f08537b48a 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi @@ -111,7 +111,7 @@ }; &gpio1 { - gpio-line-names = "nINT_ETHPHY", "", "WDOG_INT", "X_RTC_INT", + gpio-line-names = "", "", "WDOG_INT", "X_RTC_INT", "", "", "", "RESET_ETHPHY", "", "", "nENABLE_FLATLINK"; }; @@ -210,7 +210,7 @@ }; }; - reg_vdd_gpu: buck3 { + reg_vdd_vpu: buck3 { regulator-always-on; regulator-boot-on; regulator-max-microvolt = <1000000>; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts index 363020a08c9b8..4660d086cb099 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts @@ -567,6 +567,10 @@ status = "okay"; }; +&disp_blk_ctrl { + status = "disabled"; +}; + &pgc_mipi { status = "disabled"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts index 93088fa1c3b9c..d5b7168558124 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts @@ -628,6 +628,10 @@ status = "okay"; }; +&disp_blk_ctrl { + status = "disabled"; +}; + &pgc_mipi { status = "disabled"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi index cbd9d124c80d0..c9d4fb75c21d3 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi @@ -351,7 +351,7 @@ MX8MN_IOMUXC_ENET_RXC_ENET1_RGMII_RXC 0x91 MX8MN_IOMUXC_ENET_RX_CTL_ENET1_RGMII_RX_CTL 0x91 MX8MN_IOMUXC_ENET_TX_CTL_ENET1_RGMII_TX_CTL 0x1f - MX8MN_IOMUXC_GPIO1_IO09_GPIO1_IO9 0x19 + MX8MN_IOMUXC_GPIO1_IO09_GPIO1_IO9 0x159 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 0492556a10dbc..345c70c6c697a 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -770,7 +770,7 @@ <&clk IMX8MQ_SYS1_PLL_800M>, <&clk IMX8MQ_VPU_PLL>; assigned-clock-rates = <600000000>, - <600000000>, + <300000000>, <800000000>, <0>; }; diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 520b681a07bb0..75c37b1c55aaf 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -679,7 +679,7 @@ static void fpsimd_to_sve(struct task_struct *task) void *sst = task->thread.sve_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; - if (!system_supports_sve()) + if (!system_supports_sve() && !system_supports_sme()) return; vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread)); @@ -705,7 +705,7 @@ static void sve_to_fpsimd(struct task_struct *task) unsigned int i; __uint128_t const *p; - if (!system_supports_sve()) + if (!system_supports_sve() && !system_supports_sme()) return; vl = thread_get_cur_vl(&task->thread); @@ -835,7 +835,8 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) void *sst = task->thread.sve_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; - if (!test_tsk_thread_flag(task, TIF_SVE)) + if (!test_tsk_thread_flag(task, TIF_SVE) && + !thread_sm_enabled(&task->thread)) return; vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread)); @@ -909,7 +910,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, */ task->thread.svcr &= ~(SVCR_SM_MASK | SVCR_ZA_MASK); - clear_thread_flag(TIF_SME); + clear_tsk_thread_flag(task, TIF_SME); free_sme = true; } } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index d7f4f0d1ae120..5b9b4305248b8 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -932,11 +932,13 @@ static int sve_set_common(struct task_struct *target, /* * Ensure target->thread.sve_state is up to date with target's * FPSIMD regs, so that a short copyin leaves trailing - * registers unmodified. Always enable SVE even if going into - * streaming mode. + * registers unmodified. Only enable SVE if we are + * configuring normal SVE, a system with streaming SVE may not + * have normal SVE. */ fpsimd_sync_to_sve(target); - set_tsk_thread_flag(target, TIF_SVE); + if (type == ARM64_VEC_SVE) + set_tsk_thread_flag(target, TIF_SVE); target->thread.fp_type = FP_STATE_SVE; BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); @@ -1180,6 +1182,8 @@ static int zt_set(struct task_struct *target, if (ret == 0) target->thread.svcr |= SVCR_ZA_MASK; + fpsimd_flush_task_state(target); + return ret; } diff --git a/arch/parisc/mm/fixmap.c b/arch/parisc/mm/fixmap.c index cc15d737fda64..ae3493dae9dc9 100644 --- a/arch/parisc/mm/fixmap.c +++ b/arch/parisc/mm/fixmap.c @@ -19,9 +19,6 @@ void notrace set_fixmap(enum fixed_addresses idx, phys_addr_t phys) pmd_t *pmd = pmd_offset(pud, vaddr); pte_t *pte; - if (pmd_none(*pmd)) - pte = pte_alloc_kernel(pmd, vaddr); - pte = pte_offset_kernel(pmd, vaddr); set_pte_at(&init_mm, vaddr, pte, __mk_pte(phys, PAGE_KERNEL_RWX)); flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE); diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index b0c43f3b0a5f8..16b3ef4b89763 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -671,6 +671,39 @@ static void __init gateway_init(void) PAGE_SIZE, PAGE_GATEWAY, 1); } +static void __init fixmap_init(void) +{ + unsigned long addr = FIXMAP_START; + unsigned long end = FIXMAP_START + FIXMAP_SIZE; + pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d = p4d_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); + pmd_t *pmd; + + BUILD_BUG_ON(FIXMAP_SIZE > PMD_SIZE); + +#if CONFIG_PGTABLE_LEVELS == 3 + if (pud_none(*pud)) { + pmd = memblock_alloc(PAGE_SIZE << PMD_TABLE_ORDER, + PAGE_SIZE << PMD_TABLE_ORDER); + if (!pmd) + panic("fixmap: pmd allocation failed.\n"); + pud_populate(NULL, pud, pmd); + } +#endif + + pmd = pmd_offset(pud, addr); + do { + pte_t *pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!pte) + panic("fixmap: pte allocation failed.\n"); + + pmd_populate_kernel(&init_mm, pmd, pte); + + addr += PAGE_SIZE; + } while (addr < end); +} + static void __init parisc_bootmem_free(void) { unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0, }; @@ -685,6 +718,7 @@ void __init paging_init(void) setup_bootmem(); pagetable_init(); gateway_init(); + fixmap_init(); flush_cache_all_local(); /* start with known state */ flush_tlb_all_local(NULL); diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h index 46c31fb8748d5..30a12d2086871 100644 --- a/arch/powerpc/include/asm/word-at-a-time.h +++ b/arch/powerpc/include/asm/word-at-a-time.h @@ -34,7 +34,7 @@ static inline long find_zero(unsigned long mask) return leading_zero_bits >> 3; } -static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) +static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) { unsigned long rhs = val | c->low_bits; *data = rhs; diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index ffb1db3868499..1f7d86de1538e 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -33,6 +33,9 @@ * and then arrange for the ftrace function to be called. */ .macro ftrace_regs_entry allregs + /* Create a minimal stack frame for representing B */ + PPC_STLU r1, -STACK_FRAME_MIN_SIZE(r1) + /* Create our stack frame + pt_regs */ PPC_STLU r1,-SWITCH_FRAME_SIZE(r1) @@ -42,7 +45,7 @@ #ifdef CONFIG_PPC64 /* Save the original return address in A's stack frame */ - std r0, LRSAVE+SWITCH_FRAME_SIZE(r1) + std r0, LRSAVE+SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE(r1) /* Ok to continue? */ lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 @@ -77,6 +80,8 @@ mflr r7 /* Save it as pt_regs->nip */ PPC_STL r7, _NIP(r1) + /* Also save it in B's stackframe header for proper unwind */ + PPC_STL r7, LRSAVE+SWITCH_FRAME_SIZE(r1) /* Save the read LR in pt_regs->link */ PPC_STL r0, _LINK(r1) @@ -142,7 +147,7 @@ #endif /* Pop our stack frame */ - addi r1, r1, SWITCH_FRAME_SIZE + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE #ifdef CONFIG_LIVEPATCH_64 /* Based on the cmpd above, if the NIP was altered handle livepatch */ diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index fe1b83020e0df..0ec5b45b1e86a 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -314,8 +314,7 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, start = ALIGN_DOWN(start, page_size); if (altmap) { alt_start = altmap->base_pfn; - alt_end = altmap->base_pfn + altmap->reserve + - altmap->free + altmap->alloc + altmap->align; + alt_end = altmap->base_pfn + altmap->reserve + altmap->free; } pr_debug("vmemmap_free %lx...%lx\n", start, end); diff --git a/arch/riscv/kernel/crash_core.c b/arch/riscv/kernel/crash_core.c index b351a3c013555..55f1d7856b544 100644 --- a/arch/riscv/kernel/crash_core.c +++ b/arch/riscv/kernel/crash_core.c @@ -18,4 +18,6 @@ void arch_crash_save_vmcoreinfo(void) vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); #endif vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); + vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n", + kernel_map.va_kernel_pa_offset); } diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c index 4d141e2c132e5..2ea7f208f0e73 100644 --- a/arch/s390/kernel/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -459,9 +459,9 @@ static int sthyi_update_cache(u64 *rc) * * Fills the destination with system information returned by the STHYI * instruction. The data is generated by emulation or execution of STHYI, - * if available. The return value is the condition code that would be - * returned, the rc parameter is the return code which is passed in - * register R2 + 1. + * if available. The return value is either a negative error value or + * the condition code that would be returned, the rc parameter is the + * return code which is passed in register R2 + 1. */ int sthyi_fill(void *dst, u64 *rc) { diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 2cda8d9d7c6ef..f817006f9f936 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -389,8 +389,8 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) */ int handle_sthyi(struct kvm_vcpu *vcpu) { - int reg1, reg2, r = 0; - u64 code, addr, cc = 0, rc = 0; + int reg1, reg2, cc = 0, r = 0; + u64 code, addr, rc = 0; struct sthyi_sctns *sctns = NULL; if (!test_kvm_facility(vcpu->kvm, 74)) @@ -421,7 +421,10 @@ int handle_sthyi(struct kvm_vcpu *vcpu) return -ENOMEM; cc = sthyi_fill(sctns, &rc); - + if (cc < 0) { + free_page((unsigned long)sctns); + return cc; + } out: if (!cc) { if (kvm_s390_pv_cpu_is_protected(vcpu)) { diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index b9dcb4ae6c59a..05f4912380fac 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -761,6 +761,8 @@ void __init vmem_map_init(void) if (static_key_enabled(&cpu_has_bear)) set_memory_nx(0, 1); set_memory_nx(PAGE_SIZE, 1); + if (debug_pagealloc_enabled()) + set_memory_4k(0, ident_map_size >> PAGE_SHIFT); pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 6c04b52f139b5..953e280c07c38 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -471,6 +472,26 @@ void __init hyperv_init(void) wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); } + /* + * Some versions of Hyper-V that provide IBT in guest VMs have a bug + * in that there's no ENDBR64 instruction at the entry to the + * hypercall page. Because hypercalls are invoked via an indirect call + * to the hypercall page, all hypercall attempts fail when IBT is + * enabled, and Linux panics. For such buggy versions, disable IBT. + * + * Fixed versions of Hyper-V always provide ENDBR64 on the hypercall + * page, so if future Linux kernel versions enable IBT for 32-bit + * builds, additional hypercall page hackery will be required here + * to provide an ENDBR32. + */ +#ifdef CONFIG_X86_KERNEL_IBT + if (cpu_feature_enabled(X86_FEATURE_IBT) && + *(u32 *)hv_hypercall_pg != gen_endbr()) { + setup_clear_cpu_cap(X86_FEATURE_IBT); + pr_warn("Hyper-V: Disabling IBT because of Hyper-V bug\n"); + } +#endif + /* * hyperv_init() is called before LAPIC is initialized: see * apic_intr_mode_init() -> x86_platform.apic_post_init() and diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 094f88fee5369..b69b0d7756aab 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -495,4 +495,5 @@ /* BUG word 2 */ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ +#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 37f1826df2635..e8db1cff76fda 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -684,10 +684,12 @@ extern u16 get_llc_id(unsigned int cpu); extern u32 amd_get_nodes_per_socket(void); extern u32 amd_get_highest_perf(void); extern bool cpu_has_ibpb_brtype_microcode(void); +extern void amd_clear_divider(void); #else static inline u32 amd_get_nodes_per_socket(void) { return 0; } static inline u32 amd_get_highest_perf(void) { return 0; } static inline bool cpu_has_ibpb_brtype_microcode(void) { return false; } +static inline void amd_clear_divider(void) { } #endif extern unsigned long arch_align_stack(unsigned long sp); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 4239b51e0bc50..c37a3a5cdabd3 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -75,6 +75,10 @@ static const int amd_zenbleed[] = AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf), AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf)); +static const int amd_div0[] = + AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf), + AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf)); + static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) { int osvw_id = *erratum++; @@ -1130,6 +1134,11 @@ static void init_amd(struct cpuinfo_x86 *c) WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS)); zenbleed_check(c); + + if (cpu_has_amd_erratum(c, amd_div0)) { + pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n"); + setup_force_cpu_bug(X86_BUG_DIV0); + } } #ifdef CONFIG_X86_32 @@ -1309,3 +1318,13 @@ void amd_check_microcode(void) { on_each_cpu(zenbleed_check_cpu, NULL, 1); } + +/* + * Issue a DIV 0/1 insn to clear any division data from previous DIV + * operations. + */ +void noinstr amd_clear_divider(void) +{ + asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0) + :: "a" (0), "d" (0), "r" (1)); +} diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 4a817d20ce3bb..1885326a8f659 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -206,6 +206,8 @@ DEFINE_IDTENTRY(exc_divide_error) { do_error_trap(regs, 0, "divide error", X86_TRAP_DE, SIGFPE, FPE_INTDIV, error_get_trap_addr(regs)); + + amd_clear_divider(); } DEFINE_IDTENTRY(exc_overflow) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 5c86151b0d3a5..e8af9d8f024eb 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3675,7 +3675,7 @@ static int rbd_lock(struct rbd_device *rbd_dev) ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, RBD_LOCK_NAME, CEPH_CLS_LOCK_EXCLUSIVE, cookie, RBD_LOCK_TAG, "", 0); - if (ret) + if (ret && ret != -EEXIST) return ret; __rbd_lock(rbd_dev, cookie); @@ -3878,7 +3878,7 @@ static struct ceph_locker *get_lock_owner_info(struct rbd_device *rbd_dev) &rbd_dev->header_oloc, RBD_LOCK_NAME, &lock_type, &lock_tag, &lockers, &num_lockers); if (ret) { - rbd_warn(rbd_dev, "failed to retrieve lockers: %d", ret); + rbd_warn(rbd_dev, "failed to get header lockers: %d", ret); return ERR_PTR(ret); } @@ -3940,8 +3940,10 @@ static int find_watcher(struct rbd_device *rbd_dev, ret = ceph_osdc_list_watchers(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, &watchers, &num_watchers); - if (ret) + if (ret) { + rbd_warn(rbd_dev, "failed to get watchers: %d", ret); return ret; + } sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie); for (i = 0; i < num_watchers; i++) { @@ -3985,8 +3987,12 @@ static int rbd_try_lock(struct rbd_device *rbd_dev) locker = refreshed_locker = NULL; ret = rbd_lock(rbd_dev); - if (ret != -EBUSY) + if (!ret) + goto out; + if (ret != -EBUSY) { + rbd_warn(rbd_dev, "failed to lock header: %d", ret); goto out; + } /* determine if the current lock holder is still alive */ locker = get_lock_owner_info(rbd_dev); @@ -4089,11 +4095,8 @@ static int rbd_try_acquire_lock(struct rbd_device *rbd_dev) ret = rbd_try_lock(rbd_dev); if (ret < 0) { - rbd_warn(rbd_dev, "failed to lock header: %d", ret); - if (ret == -EBLOCKLISTED) - goto out; - - ret = 1; /* request lock anyway */ + rbd_warn(rbd_dev, "failed to acquire lock: %d", ret); + goto out; } if (ret > 0) { up_write(&rbd_dev->lock_rwsem); @@ -6627,12 +6630,11 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) cancel_delayed_work_sync(&rbd_dev->lock_dwork); if (!ret) ret = -ETIMEDOUT; - } - if (ret) { - rbd_warn(rbd_dev, "failed to acquire exclusive lock: %ld", ret); - return ret; + rbd_warn(rbd_dev, "failed to acquire lock: %ld", ret); } + if (ret) + return ret; /* * The lock may have been released by now, unless automatic lock diff --git a/drivers/clk/imx/clk-imx93.c b/drivers/clk/imx/clk-imx93.c index b6c7c2725906c..44f435103c65a 100644 --- a/drivers/clk/imx/clk-imx93.c +++ b/drivers/clk/imx/clk-imx93.c @@ -291,7 +291,7 @@ static int imx93_clocks_probe(struct platform_device *pdev) anatop_base = devm_of_iomap(dev, np, 0, NULL); of_node_put(np); if (WARN_ON(IS_ERR(anatop_base))) { - ret = PTR_ERR(base); + ret = PTR_ERR(anatop_base); goto unregister_hws; } diff --git a/drivers/clk/mediatek/clk-mt8183.c b/drivers/clk/mediatek/clk-mt8183.c index 2336a1b69c093..3b605c30e8494 100644 --- a/drivers/clk/mediatek/clk-mt8183.c +++ b/drivers/clk/mediatek/clk-mt8183.c @@ -328,6 +328,14 @@ static const char * const atb_parents[] = { "syspll_d5" }; +static const char * const sspm_parents[] = { + "clk26m", + "univpll_d2_d4", + "syspll_d2_d2", + "univpll_d2_d2", + "syspll_d3" +}; + static const char * const dpi0_parents[] = { "clk26m", "tvdpll_d2", @@ -506,6 +514,9 @@ static const struct mtk_mux top_muxes[] = { /* CLK_CFG_6 */ MUX_GATE_CLR_SET_UPD(CLK_TOP_MUX_ATB, "atb_sel", atb_parents, 0xa0, 0xa4, 0xa8, 0, 2, 7, 0x004, 24), + MUX_GATE_CLR_SET_UPD_FLAGS(CLK_TOP_MUX_SSPM, "sspm_sel", + sspm_parents, 0xa0, 0xa4, 0xa8, 8, 3, 15, 0x004, 25, + CLK_IS_CRITICAL | CLK_SET_RATE_PARENT), MUX_GATE_CLR_SET_UPD(CLK_TOP_MUX_DPI0, "dpi0_sel", dpi0_parents, 0xa0, 0xa4, 0xa8, 16, 4, 23, 0x004, 26), MUX_GATE_CLR_SET_UPD(CLK_TOP_MUX_SCAM, "scam_sel", @@ -671,10 +682,18 @@ static const struct mtk_gate_regs infra3_cg_regs = { GATE_MTK(_id, _name, _parent, &infra2_cg_regs, _shift, \ &mtk_clk_gate_ops_setclr) +#define GATE_INFRA2_FLAGS(_id, _name, _parent, _shift, _flag) \ + GATE_MTK_FLAGS(_id, _name, _parent, &infra2_cg_regs, \ + _shift, &mtk_clk_gate_ops_setclr, _flag) + #define GATE_INFRA3(_id, _name, _parent, _shift) \ GATE_MTK(_id, _name, _parent, &infra3_cg_regs, _shift, \ &mtk_clk_gate_ops_setclr) +#define GATE_INFRA3_FLAGS(_id, _name, _parent, _shift, _flag) \ + GATE_MTK_FLAGS(_id, _name, _parent, &infra3_cg_regs, \ + _shift, &mtk_clk_gate_ops_setclr, _flag) + static const struct mtk_gate infra_clks[] = { /* INFRA0 */ GATE_INFRA0(CLK_INFRA_PMIC_TMR, "infra_pmic_tmr", "axi_sel", 0), @@ -746,7 +765,11 @@ static const struct mtk_gate infra_clks[] = { GATE_INFRA2(CLK_INFRA_UNIPRO_TICK, "infra_unipro_tick", "fufs_sel", 12), GATE_INFRA2(CLK_INFRA_UFS_MP_SAP_BCLK, "infra_ufs_mp_sap_bck", "fufs_sel", 13), GATE_INFRA2(CLK_INFRA_MD32_BCLK, "infra_md32_bclk", "axi_sel", 14), + /* infra_sspm is main clock in co-processor, should not be closed in Linux. */ + GATE_INFRA2_FLAGS(CLK_INFRA_SSPM, "infra_sspm", "sspm_sel", 15, CLK_IS_CRITICAL), GATE_INFRA2(CLK_INFRA_UNIPRO_MBIST, "infra_unipro_mbist", "axi_sel", 16), + /* infra_sspm_bus_hclk is main clock in co-processor, should not be closed in Linux. */ + GATE_INFRA2_FLAGS(CLK_INFRA_SSPM_BUS_HCLK, "infra_sspm_bus_hclk", "axi_sel", 17, CLK_IS_CRITICAL), GATE_INFRA2(CLK_INFRA_I2C5, "infra_i2c5", "i2c_sel", 18), GATE_INFRA2(CLK_INFRA_I2C5_ARBITER, "infra_i2c5_arbiter", "i2c_sel", 19), GATE_INFRA2(CLK_INFRA_I2C5_IMM, "infra_i2c5_imm", "i2c_sel", 20), @@ -764,6 +787,10 @@ static const struct mtk_gate infra_clks[] = { GATE_INFRA3(CLK_INFRA_MSDC0_SELF, "infra_msdc0_self", "msdc50_0_sel", 0), GATE_INFRA3(CLK_INFRA_MSDC1_SELF, "infra_msdc1_self", "msdc50_0_sel", 1), GATE_INFRA3(CLK_INFRA_MSDC2_SELF, "infra_msdc2_self", "msdc50_0_sel", 2), + /* infra_sspm_26m_self is main clock in co-processor, should not be closed in Linux. */ + GATE_INFRA3_FLAGS(CLK_INFRA_SSPM_26M_SELF, "infra_sspm_26m_self", "f_f26m_ck", 3, CLK_IS_CRITICAL), + /* infra_sspm_32k_self is main clock in co-processor, should not be closed in Linux. */ + GATE_INFRA3_FLAGS(CLK_INFRA_SSPM_32K_SELF, "infra_sspm_32k_self", "f_f26m_ck", 4, CLK_IS_CRITICAL), GATE_INFRA3(CLK_INFRA_UFS_AXI, "infra_ufs_axi", "axi_sel", 5), GATE_INFRA3(CLK_INFRA_I2C6, "infra_i2c6", "i2c_sel", 6), GATE_INFRA3(CLK_INFRA_AP_MSDC0, "infra_ap_msdc0", "msdc50_hclk_sel", 7), diff --git a/drivers/firmware/arm_scmi/mailbox.c b/drivers/firmware/arm_scmi/mailbox.c index 1efa5e9392c42..19246ed1f01ff 100644 --- a/drivers/firmware/arm_scmi/mailbox.c +++ b/drivers/firmware/arm_scmi/mailbox.c @@ -166,8 +166,10 @@ static int mailbox_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, return -ENOMEM; shmem = of_parse_phandle(cdev->of_node, "shmem", idx); - if (!of_device_is_compatible(shmem, "arm,scmi-shmem")) + if (!of_device_is_compatible(shmem, "arm,scmi-shmem")) { + of_node_put(shmem); return -ENXIO; + } ret = of_address_to_resource(shmem, 0, &res); of_node_put(shmem); diff --git a/drivers/firmware/arm_scmi/raw_mode.c b/drivers/firmware/arm_scmi/raw_mode.c index 6971dcf72fb99..0493aa3c12bf5 100644 --- a/drivers/firmware/arm_scmi/raw_mode.c +++ b/drivers/firmware/arm_scmi/raw_mode.c @@ -818,10 +818,13 @@ static ssize_t scmi_dbg_raw_mode_common_write(struct file *filp, * before sending it with a single RAW xfer. */ if (rd->tx_size < rd->tx_req_size) { - size_t cnt; + ssize_t cnt; cnt = simple_write_to_buffer(rd->tx.buf, rd->tx.len, ppos, buf, count); + if (cnt < 0) + return cnt; + rd->tx_size += cnt; if (cnt < count) return cnt; diff --git a/drivers/firmware/arm_scmi/smc.c b/drivers/firmware/arm_scmi/smc.c index 93272e4bbd12b..9ba0aab8ce22d 100644 --- a/drivers/firmware/arm_scmi/smc.c +++ b/drivers/firmware/arm_scmi/smc.c @@ -23,6 +23,7 @@ /** * struct scmi_smc - Structure representing a SCMI smc transport * + * @irq: An optional IRQ for completion * @cinfo: SCMI channel info * @shmem: Transmit/Receive shared memory area * @shmem_lock: Lock to protect access to Tx/Rx shared memory area. @@ -33,6 +34,7 @@ */ struct scmi_smc { + int irq; struct scmi_chan_info *cinfo; struct scmi_shared_mem __iomem *shmem; /* Protect access to shmem area */ @@ -106,7 +108,7 @@ static int smc_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, struct resource res; struct device_node *np; u32 func_id; - int ret, irq; + int ret; if (!tx) return -ENODEV; @@ -116,8 +118,10 @@ static int smc_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, return -ENOMEM; np = of_parse_phandle(cdev->of_node, "shmem", 0); - if (!of_device_is_compatible(np, "arm,scmi-shmem")) + if (!of_device_is_compatible(np, "arm,scmi-shmem")) { + of_node_put(np); return -ENXIO; + } ret = of_address_to_resource(np, 0, &res); of_node_put(np); @@ -142,11 +146,10 @@ static int smc_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, * completion of a message is signaled by an interrupt rather than by * the return of the SMC call. */ - irq = of_irq_get_byname(cdev->of_node, "a2p"); - if (irq > 0) { - ret = devm_request_irq(dev, irq, smc_msg_done_isr, - IRQF_NO_SUSPEND, - dev_name(dev), scmi_info); + scmi_info->irq = of_irq_get_byname(cdev->of_node, "a2p"); + if (scmi_info->irq > 0) { + ret = request_irq(scmi_info->irq, smc_msg_done_isr, + IRQF_NO_SUSPEND, dev_name(dev), scmi_info); if (ret) { dev_err(dev, "failed to setup SCMI smc irq\n"); return ret; @@ -168,6 +171,10 @@ static int smc_chan_free(int id, void *p, void *data) struct scmi_chan_info *cinfo = p; struct scmi_smc *scmi_info = cinfo->transport_info; + /* Ignore any possible further reception on the IRQ path */ + if (scmi_info->irq > 0) + free_irq(scmi_info->irq, scmi_info); + cinfo->transport_info = NULL; scmi_info->cinfo = NULL; diff --git a/drivers/firmware/smccc/soc_id.c b/drivers/firmware/smccc/soc_id.c index 890eb454599a3..1990263fbba0e 100644 --- a/drivers/firmware/smccc/soc_id.c +++ b/drivers/firmware/smccc/soc_id.c @@ -34,7 +34,6 @@ static struct soc_device_attribute *soc_dev_attr; static int __init smccc_soc_init(void) { - struct arm_smccc_res res; int soc_id_rev, soc_id_version; static char soc_id_str[20], soc_id_rev_str[12]; static char soc_id_jep106_id_str[12]; @@ -49,13 +48,13 @@ static int __init smccc_soc_init(void) } if (soc_id_version < 0) { - pr_err("ARCH_SOC_ID(0) returned error: %lx\n", res.a0); + pr_err("Invalid SoC Version: %x\n", soc_id_version); return -EINVAL; } soc_id_rev = arm_smccc_get_soc_id_revision(); if (soc_id_rev < 0) { - pr_err("ARCH_SOC_ID(1) returned error: %lx\n", res.a0); + pr_err("Invalid SoC Revision: %x\n", soc_id_rev); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 2cd081cbf7062..59ffb9389c697 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1623,14 +1623,15 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev) return 0; } -static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev) +static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev, + uint32_t reserve_size) { struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; memset(ctx, 0, sizeof(*ctx)); ctx->c2p_train_data_offset = - ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M); + ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M); ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET); ctx->train_data_size = @@ -1648,9 +1649,10 @@ static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev) */ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) { - int ret; struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; bool mem_train_support = false; + uint32_t reserve_size = 0; + int ret; if (!amdgpu_sriov_vf(adev)) { if (amdgpu_atomfirmware_mem_training_supported(adev)) @@ -1666,14 +1668,15 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) * Otherwise, fallback to legacy approach to check and reserve tmr block for ip * discovery data and G6 memory training data respectively */ - adev->mman.discovery_tmr_size = - amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); - if (!adev->mman.discovery_tmr_size) - adev->mman.discovery_tmr_size = DISCOVERY_TMR_OFFSET; + if (adev->bios) + reserve_size = + amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); + if (!reserve_size) + reserve_size = DISCOVERY_TMR_OFFSET; if (mem_train_support) { /* reserve vram for mem train according to TMR location */ - amdgpu_ttm_training_data_block_init(adev); + amdgpu_ttm_training_data_block_init(adev, reserve_size); ret = amdgpu_bo_create_kernel_at(adev, ctx->c2p_train_data_offset, ctx->train_data_size, @@ -1687,14 +1690,13 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; } - ret = amdgpu_bo_create_kernel_at(adev, - adev->gmc.real_vram_size - adev->mman.discovery_tmr_size, - adev->mman.discovery_tmr_size, - &adev->mman.discovery_memory, - NULL); + ret = amdgpu_bo_create_kernel_at( + adev, adev->gmc.real_vram_size - reserve_size, + reserve_size, &adev->mman.fw_reserved_memory, NULL); if (ret) { DRM_ERROR("alloc tmr failed(%d)!\n", ret); - amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, + NULL, NULL); return ret; } @@ -1881,8 +1883,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) /* return the stolen vga memory back to VRAM */ amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); - /* return the IP Discovery TMR memory back to VRAM */ - amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); + /* return the FW reserved memory back to VRAM */ + amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, + NULL); if (adev->mman.stolen_reserved_size) amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, NULL, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index e2cd5894afc9d..da6544fdc8ddd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -78,7 +78,8 @@ struct amdgpu_mman { /* discovery */ uint8_t *discovery_bin; uint32_t discovery_tmr_size; - struct amdgpu_bo *discovery_memory; + /* fw reserved memory */ + struct amdgpu_bo *fw_reserved_memory; /* firmware VRAM reservation */ u64 fw_vram_usage_start_offset; diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index e1c76e5bfa827..2702ad4c26c88 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -165,58 +165,148 @@ static u32 preparser_disable(bool state) return MI_ARB_CHECK | 1 << 8 | state; } -u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg) +static i915_reg_t gen12_get_aux_inv_reg(struct intel_engine_cs *engine) { - u32 gsi_offset = gt->uncore->gsi_offset; + switch (engine->id) { + case RCS0: + return GEN12_CCS_AUX_INV; + case BCS0: + return GEN12_BCS0_AUX_INV; + case VCS0: + return GEN12_VD0_AUX_INV; + case VCS2: + return GEN12_VD2_AUX_INV; + case VECS0: + return GEN12_VE0_AUX_INV; + case CCS0: + return GEN12_CCS0_AUX_INV; + default: + return INVALID_MMIO_REG; + } +} + +static bool gen12_needs_ccs_aux_inv(struct intel_engine_cs *engine) +{ + i915_reg_t reg = gen12_get_aux_inv_reg(engine); + + if (IS_PONTEVECCHIO(engine->i915)) + return false; + + /* + * So far platforms supported by i915 having flat ccs do not require + * AUX invalidation. Check also whether the engine requires it. + */ + return i915_mmio_reg_valid(reg) && !HAS_FLAT_CCS(engine->i915); +} + +u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs) +{ + i915_reg_t inv_reg = gen12_get_aux_inv_reg(engine); + u32 gsi_offset = engine->gt->uncore->gsi_offset; + + if (!gen12_needs_ccs_aux_inv(engine)) + return cs; *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset; *cs++ = AUX_INV; - *cs++ = MI_NOOP; + + *cs++ = MI_SEMAPHORE_WAIT_TOKEN | + MI_SEMAPHORE_REGISTER_POLL | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset; + *cs++ = 0; + *cs++ = 0; return cs; } +static int mtl_dummy_pipe_control(struct i915_request *rq) +{ + /* Wa_14016712196 */ + if (IS_MTL_GRAPHICS_STEP(rq->engine->i915, M, STEP_A0, STEP_B0) || + IS_MTL_GRAPHICS_STEP(rq->engine->i915, P, STEP_A0, STEP_B0)) { + u32 *cs; + + /* dummy PIPE_CONTROL + depth flush */ + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + cs = gen12_emit_pipe_control(cs, + 0, + PIPE_CONTROL_DEPTH_CACHE_FLUSH, + LRC_PPHWSP_SCRATCH_ADDR); + intel_ring_advance(rq, cs); + } + + return 0; +} + int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) { struct intel_engine_cs *engine = rq->engine; - if (mode & EMIT_FLUSH) { - u32 flags = 0; + /* + * On Aux CCS platforms the invalidation of the Aux + * table requires quiescing memory traffic beforehand + */ + if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) { + u32 bit_group_0 = 0; + u32 bit_group_1 = 0; + int err; u32 *cs; - flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; - flags |= PIPE_CONTROL_FLUSH_L3; - flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; - flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + err = mtl_dummy_pipe_control(rq); + if (err) + return err; + + bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH; + + /* + * When required, in MTL and beyond platforms we + * need to set the CCS_FLUSH bit in the pipe control + */ + if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70)) + bit_group_0 |= PIPE_CONTROL_CCS_FLUSH; + + bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH; + bit_group_1 |= PIPE_CONTROL_FLUSH_L3; + bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; /* Wa_1409600907:tgl,adl-p */ - flags |= PIPE_CONTROL_DEPTH_STALL; - flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; - flags |= PIPE_CONTROL_FLUSH_ENABLE; + bit_group_1 |= PIPE_CONTROL_DEPTH_STALL; + bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE; + bit_group_1 |= PIPE_CONTROL_FLUSH_ENABLE; - flags |= PIPE_CONTROL_STORE_DATA_INDEX; - flags |= PIPE_CONTROL_QW_WRITE; + bit_group_1 |= PIPE_CONTROL_STORE_DATA_INDEX; + bit_group_1 |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_CS_STALL; + bit_group_1 |= PIPE_CONTROL_CS_STALL; if (!HAS_3D_PIPELINE(engine->i915)) - flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; + bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS; else if (engine->class == COMPUTE_CLASS) - flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; + bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); - cs = gen12_emit_pipe_control(cs, - PIPE_CONTROL0_HDC_PIPELINE_FLUSH, - flags, LRC_PPHWSP_SCRATCH_ADDR); + cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1, + LRC_PPHWSP_SCRATCH_ADDR); intel_ring_advance(rq, cs); } if (mode & EMIT_INVALIDATE) { u32 flags = 0; u32 *cs, count; + int err; + + err = mtl_dummy_pipe_control(rq); + if (err) + return err; flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TLB_INVALIDATE; @@ -236,10 +326,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) else if (engine->class == COMPUTE_CLASS) flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; - if (!HAS_FLAT_CCS(rq->engine->i915)) - count = 8 + 4; - else - count = 8; + count = 8; + if (gen12_needs_ccs_aux_inv(rq->engine)) + count += 8; cs = intel_ring_begin(rq, count); if (IS_ERR(cs)) @@ -254,11 +343,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); - if (!HAS_FLAT_CCS(rq->engine->i915)) { - /* hsdes: 1809175790 */ - cs = gen12_emit_aux_table_inv(rq->engine->gt, - cs, GEN12_GFX_CCS_AUX_NV); - } + cs = gen12_emit_aux_table_inv(engine, cs); *cs++ = preparser_disable(false); intel_ring_advance(rq, cs); @@ -269,21 +354,14 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) { - intel_engine_mask_t aux_inv = 0; - u32 cmd, *cs; + u32 cmd = 4; + u32 *cs; - cmd = 4; if (mode & EMIT_INVALIDATE) { cmd += 2; - if (!HAS_FLAT_CCS(rq->engine->i915) && - (rq->engine->class == VIDEO_DECODE_CLASS || - rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) { - aux_inv = rq->engine->mask & - ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0); - if (aux_inv) - cmd += 4; - } + if (gen12_needs_ccs_aux_inv(rq->engine)) + cmd += 8; } cs = intel_ring_begin(rq, cmd); @@ -307,6 +385,10 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) cmd |= MI_INVALIDATE_TLB; if (rq->engine->class == VIDEO_DECODE_CLASS) cmd |= MI_INVALIDATE_BSD; + + if (gen12_needs_ccs_aux_inv(rq->engine) && + rq->engine->class == COPY_ENGINE_CLASS) + cmd |= MI_FLUSH_DW_CCS; } *cs++ = cmd; @@ -314,14 +396,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) *cs++ = 0; /* upper addr */ *cs++ = 0; /* value */ - if (aux_inv) { /* hsdes: 1809175790 */ - if (rq->engine->class == VIDEO_DECODE_CLASS) - cs = gen12_emit_aux_table_inv(rq->engine->gt, - cs, GEN12_VD0_AUX_NV); - else - cs = gen12_emit_aux_table_inv(rq->engine->gt, - cs, GEN12_VE0_AUX_NV); - } + cs = gen12_emit_aux_table_inv(rq->engine, cs); if (mode & EMIT_INVALIDATE) *cs++ = preparser_disable(false); @@ -733,6 +808,13 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE); + /* Wa_14016712196 */ + if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || + IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + /* dummy PIPE_CONTROL + depth flush */ + cs = gen12_emit_pipe_control(cs, 0, + PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0); + if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) /* Wa_1409600907 */ flags |= PIPE_CONTROL_DEPTH_STALL; diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h index 655e5c00ddc27..867ba697aceb8 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h @@ -13,6 +13,7 @@ #include "intel_gt_regs.h" #include "intel_gpu_commands.h" +struct intel_engine_cs; struct intel_gt; struct i915_request; @@ -46,28 +47,32 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); -u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg); +u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs); static inline u32 * -__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) +__gen8_emit_pipe_control(u32 *batch, u32 bit_group_0, + u32 bit_group_1, u32 offset) { memset(batch, 0, 6 * sizeof(u32)); - batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0; - batch[1] = flags1; + batch[0] = GFX_OP_PIPE_CONTROL(6) | bit_group_0; + batch[1] = bit_group_1; batch[2] = offset; return batch + 6; } -static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) +static inline u32 *gen8_emit_pipe_control(u32 *batch, + u32 bit_group_1, u32 offset) { - return __gen8_emit_pipe_control(batch, 0, flags, offset); + return __gen8_emit_pipe_control(batch, 0, bit_group_1, offset); } -static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) +static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 bit_group_0, + u32 bit_group_1, u32 offset) { - return __gen8_emit_pipe_control(batch, flags0, flags1, offset); + return __gen8_emit_pipe_control(batch, bit_group_0, + bit_group_1, offset); } static inline u32 * diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 5d143e2a8db03..2bd8d98d21102 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -121,6 +121,7 @@ #define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ #define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */ +#define MI_SEMAPHORE_REGISTER_POLL (1 << 16) #define MI_SEMAPHORE_POLL (1 << 15) #define MI_SEMAPHORE_SAD_GT_SDD (0 << 12) #define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12) @@ -299,6 +300,7 @@ #define PIPE_CONTROL_QW_WRITE (1<<14) #define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) #define PIPE_CONTROL_DEPTH_STALL (1<<13) +#define PIPE_CONTROL_CCS_FLUSH (1<<13) /* MTL+ */ #define PIPE_CONTROL_WRITE_FLUSH (1<<12) #define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */ #define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index fd1f9cd35e9d7..b8b7992e72537 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -331,9 +331,11 @@ #define GEN8_PRIVATE_PAT_HI _MMIO(0x40e0 + 4) #define GEN10_PAT_INDEX(index) _MMIO(0x40e0 + (index) * 4) #define BSD_HWS_PGA_GEN7 _MMIO(0x4180) -#define GEN12_GFX_CCS_AUX_NV _MMIO(0x4208) -#define GEN12_VD0_AUX_NV _MMIO(0x4218) -#define GEN12_VD1_AUX_NV _MMIO(0x4228) + +#define GEN12_CCS_AUX_INV _MMIO(0x4208) +#define GEN12_VD0_AUX_INV _MMIO(0x4218) +#define GEN12_VE0_AUX_INV _MMIO(0x4238) +#define GEN12_BCS0_AUX_INV _MMIO(0x4248) #define GEN8_RTCR _MMIO(0x4260) #define GEN8_M1TCR _MMIO(0x4264) @@ -341,14 +343,12 @@ #define GEN8_BTCR _MMIO(0x426c) #define GEN8_VTCR _MMIO(0x4270) -#define GEN12_VD2_AUX_NV _MMIO(0x4298) -#define GEN12_VD3_AUX_NV _MMIO(0x42a8) -#define GEN12_VE0_AUX_NV _MMIO(0x4238) - #define BLT_HWS_PGA_GEN7 _MMIO(0x4280) -#define GEN12_VE1_AUX_NV _MMIO(0x42b8) +#define GEN12_VD2_AUX_INV _MMIO(0x4298) +#define GEN12_CCS0_AUX_INV _MMIO(0x42c8) #define AUX_INV REG_BIT(0) + #define VEBOX_HWS_PGA_GEN7 _MMIO(0x4380) #define GEN12_AUX_ERR_DBG _MMIO(0x43f4) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 81a96c52a92b3..502a1c0093aab 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1364,10 +1364,7 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) IS_DG2_G11(ce->engine->i915)) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); - /* hsdes: 1809175790 */ - if (!HAS_FLAT_CCS(ce->engine->i915)) - cs = gen12_emit_aux_table_inv(ce->engine->gt, - cs, GEN12_GFX_CCS_AUX_NV); + cs = gen12_emit_aux_table_inv(ce->engine, cs); /* Wa_16014892111 */ if (IS_DG2(ce->engine->i915)) @@ -1390,17 +1387,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); - /* hsdes: 1809175790 */ - if (!HAS_FLAT_CCS(ce->engine->i915)) { - if (ce->engine->class == VIDEO_DECODE_CLASS) - cs = gen12_emit_aux_table_inv(ce->engine->gt, - cs, GEN12_VD0_AUX_NV); - else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS) - cs = gen12_emit_aux_table_inv(ce->engine->gt, - cs, GEN12_VE0_AUX_NV); - } - - return cs; + return gen12_emit_aux_table_inv(ce->engine, cs); } static void diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 8ef93889061a6..5ec293011d990 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -449,8 +449,11 @@ int i915_active_add_request(struct i915_active *ref, struct i915_request *rq) } } while (unlikely(is_barrier(active))); - if (!__i915_active_fence_set(active, fence)) + fence = __i915_active_fence_set(active, fence); + if (!fence) __i915_active_acquire(ref); + else + dma_fence_put(fence); out: i915_active_release(ref); @@ -469,13 +472,9 @@ __i915_active_set_fence(struct i915_active *ref, return NULL; } - rcu_read_lock(); prev = __i915_active_fence_set(active, fence); - if (prev) - prev = dma_fence_get_rcu(prev); - else + if (!prev) __i915_active_acquire(ref); - rcu_read_unlock(); return prev; } @@ -1019,10 +1018,11 @@ void i915_request_add_active_barriers(struct i915_request *rq) * * Records the new @fence as the last active fence along its timeline in * this active tracker, moving the tracking callbacks from the previous - * fence onto this one. Returns the previous fence (if not already completed), - * which the caller must ensure is executed before the new fence. To ensure - * that the order of fences within the timeline of the i915_active_fence is - * understood, it should be locked by the caller. + * fence onto this one. Gets and returns a reference to the previous fence + * (if not already completed), which the caller must put after making sure + * that it is executed before the new fence. To ensure that the order of + * fences within the timeline of the i915_active_fence is understood, it + * should be locked by the caller. */ struct dma_fence * __i915_active_fence_set(struct i915_active_fence *active, @@ -1031,7 +1031,23 @@ __i915_active_fence_set(struct i915_active_fence *active, struct dma_fence *prev; unsigned long flags; - if (fence == rcu_access_pointer(active->fence)) + /* + * In case of fences embedded in i915_requests, their memory is + * SLAB_FAILSAFE_BY_RCU, then it can be reused right after release + * by new requests. Then, there is a risk of passing back a pointer + * to a new, completely unrelated fence that reuses the same memory + * while tracked under a different active tracker. Combined with i915 + * perf open/close operations that build await dependencies between + * engine kernel context requests and user requests from different + * timelines, this can lead to dependency loops and infinite waits. + * + * As a countermeasure, we try to get a reference to the active->fence + * first, so if we succeed and pass it back to our user then it is not + * released and potentially reused by an unrelated request before the + * user has a chance to set up an await dependency on it. + */ + prev = i915_active_fence_get(active); + if (fence == prev) return fence; GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); @@ -1040,27 +1056,56 @@ __i915_active_fence_set(struct i915_active_fence *active, * Consider that we have two threads arriving (A and B), with * C already resident as the active->fence. * - * A does the xchg first, and so it sees C or NULL depending - * on the timing of the interrupt handler. If it is NULL, the - * previous fence must have been signaled and we know that - * we are first on the timeline. If it is still present, - * we acquire the lock on that fence and serialise with the interrupt - * handler, in the process removing it from any future interrupt - * callback. A will then wait on C before executing (if present). - * - * As B is second, it sees A as the previous fence and so waits for - * it to complete its transition and takes over the occupancy for - * itself -- remembering that it needs to wait on A before executing. + * Both A and B have got a reference to C or NULL, depending on the + * timing of the interrupt handler. Let's assume that if A has got C + * then it has locked C first (before B). * * Note the strong ordering of the timeline also provides consistent * nesting rules for the fence->lock; the inner lock is always the * older lock. */ spin_lock_irqsave(fence->lock, flags); - prev = xchg(__active_fence_slot(active), fence); - if (prev) { - GEM_BUG_ON(prev == fence); + if (prev) spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); + + /* + * A does the cmpxchg first, and so it sees C or NULL, as before, or + * something else, depending on the timing of other threads and/or + * interrupt handler. If not the same as before then A unlocks C if + * applicable and retries, starting from an attempt to get a new + * active->fence. Meanwhile, B follows the same path as A. + * Once A succeeds with cmpxch, B fails again, retires, gets A from + * active->fence, locks it as soon as A completes, and possibly + * succeeds with cmpxchg. + */ + while (cmpxchg(__active_fence_slot(active), prev, fence) != prev) { + if (prev) { + spin_unlock(prev->lock); + dma_fence_put(prev); + } + spin_unlock_irqrestore(fence->lock, flags); + + prev = i915_active_fence_get(active); + GEM_BUG_ON(prev == fence); + + spin_lock_irqsave(fence->lock, flags); + if (prev) + spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); + } + + /* + * If prev is NULL then the previous fence must have been signaled + * and we know that we are first on the timeline. If it is still + * present then, having the lock on that fence already acquired, we + * serialise with the interrupt handler, in the process of removing it + * from any future interrupt callback. A will then wait on C before + * executing (if present). + * + * As B is second, it sees A as the previous fence and so waits for + * it to complete its transition and takes over the occupancy for + * itself -- remembering that it needs to wait on A before executing. + */ + if (prev) { __list_del_entry(&active->cb.node); spin_unlock(prev->lock); /* serialise with prev->cb_list */ } @@ -1077,11 +1122,7 @@ int i915_active_fence_set(struct i915_active_fence *active, int err = 0; /* Must maintain timeline ordering wrt previous active requests */ - rcu_read_lock(); fence = __i915_active_fence_set(active, &rq->fence); - if (fence) /* but the previous fence may not belong to that timeline! */ - fence = dma_fence_get_rcu(fence); - rcu_read_unlock(); if (fence) { err = i915_request_await_dma_fence(rq, fence); dma_fence_put(fence); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 630a732aaecca..b620d3c3fe724 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1661,6 +1661,11 @@ __i915_request_ensure_parallel_ordering(struct i915_request *rq, request_to_parent(rq)->parallel.last_rq = i915_request_get(rq); + /* + * Users have to put a reference potentially got by + * __i915_active_fence_set() to the returned request + * when no longer needed + */ return to_request(__i915_active_fence_set(&timeline->last_request, &rq->fence)); } @@ -1707,6 +1712,10 @@ __i915_request_ensure_ordering(struct i915_request *rq, 0); } + /* + * Users have to put the reference to prev potentially got + * by __i915_active_fence_set() when no longer needed + */ return prev; } @@ -1760,6 +1769,8 @@ __i915_request_add_to_timeline(struct i915_request *rq) prev = __i915_request_ensure_ordering(rq, timeline); else prev = __i915_request_ensure_parallel_ordering(rq, timeline); + if (prev) + i915_request_put(prev); /* * Make sure that no request gazumped us - if it was allocated after diff --git a/drivers/gpu/drm/imx/ipuv3/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3/ipuv3-crtc.c index 5f26090b0c985..89585b31b985e 100644 --- a/drivers/gpu/drm/imx/ipuv3/ipuv3-crtc.c +++ b/drivers/gpu/drm/imx/ipuv3/ipuv3-crtc.c @@ -310,7 +310,7 @@ static void ipu_crtc_mode_set_nofb(struct drm_crtc *crtc) dev_warn(ipu_crtc->dev, "8-pixel align hactive %d -> %d\n", sig_cfg.mode.hactive, new_hactive); - sig_cfg.mode.hfront_porch = new_hactive - sig_cfg.mode.hactive; + sig_cfg.mode.hfront_porch -= new_hactive - sig_cfg.mode.hactive; sig_cfg.mode.hactive = new_hactive; } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 7139a522b2f3b..54e3083076b78 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -519,7 +519,8 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo, if (bo->pin_count) { *locked = false; - *busy = false; + if (busy) + *busy = false; return false; } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 3fd83fb757227..bbad54aa6c8ca 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -894,6 +894,12 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, { int index; + if (cmds->num == CMDQ_BATCH_ENTRIES - 1 && + (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) { + arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); + cmds->num = 0; + } + if (cmds->num == CMDQ_BATCH_ENTRIES) { arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false); cmds->num = 0; @@ -3429,6 +3435,44 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) return 0; } +#define IIDR_IMPLEMENTER_ARM 0x43b +#define IIDR_PRODUCTID_ARM_MMU_600 0x483 +#define IIDR_PRODUCTID_ARM_MMU_700 0x487 + +static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu) +{ + u32 reg; + unsigned int implementer, productid, variant, revision; + + reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR); + implementer = FIELD_GET(IIDR_IMPLEMENTER, reg); + productid = FIELD_GET(IIDR_PRODUCTID, reg); + variant = FIELD_GET(IIDR_VARIANT, reg); + revision = FIELD_GET(IIDR_REVISION, reg); + + switch (implementer) { + case IIDR_IMPLEMENTER_ARM: + switch (productid) { + case IIDR_PRODUCTID_ARM_MMU_600: + /* Arm erratum 1076982 */ + if (variant == 0 && revision <= 2) + smmu->features &= ~ARM_SMMU_FEAT_SEV; + /* Arm erratum 1209401 */ + if (variant < 2) + smmu->features &= ~ARM_SMMU_FEAT_NESTING; + break; + case IIDR_PRODUCTID_ARM_MMU_700: + /* Arm erratum 2812531 */ + smmu->features &= ~ARM_SMMU_FEAT_BTM; + smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC; + /* Arm errata 2268618, 2812531 */ + smmu->features &= ~ARM_SMMU_FEAT_NESTING; + break; + } + break; + } +} + static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) { u32 reg; @@ -3635,6 +3679,12 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) smmu->ias = max(smmu->ias, smmu->oas); + if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) && + (smmu->features & ARM_SMMU_FEAT_TRANS_S2)) + smmu->features |= ARM_SMMU_FEAT_NESTING; + + arm_smmu_device_iidr_probe(smmu); + if (arm_smmu_sva_supported(smmu)) smmu->features |= ARM_SMMU_FEAT_SVA; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index b574c58a34876..dcab85698a4e2 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -69,6 +69,12 @@ #define IDR5_VAX GENMASK(11, 10) #define IDR5_VAX_52_BIT 1 +#define ARM_SMMU_IIDR 0x18 +#define IIDR_PRODUCTID GENMASK(31, 20) +#define IIDR_VARIANT GENMASK(19, 16) +#define IIDR_REVISION GENMASK(15, 12) +#define IIDR_IMPLEMENTER GENMASK(11, 0) + #define ARM_SMMU_CR0 0x20 #define CR0_ATSCHK (1 << 4) #define CR0_CMDQEN (1 << 3) @@ -639,11 +645,13 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_BTM (1 << 16) #define ARM_SMMU_FEAT_SVA (1 << 17) #define ARM_SMMU_FEAT_E2H (1 << 18) +#define ARM_SMMU_FEAT_NESTING (1 << 19) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1) #define ARM_SMMU_OPT_MSIPOLL (1 << 2) +#define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3) u32 options; struct arm_smmu_cmdq cmdq; diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index c0331b2680108..fe391de1aba32 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -839,7 +839,7 @@ hfcpci_fill_fifo(struct bchannel *bch) *z1t = cpu_to_le16(new_z1); /* now send data */ if (bch->tx_idx < bch->tx_skb->len) return; - dev_kfree_skb(bch->tx_skb); + dev_kfree_skb_any(bch->tx_skb); if (get_next_bframe(bch)) goto next_t_frame; return; @@ -895,7 +895,7 @@ hfcpci_fill_fifo(struct bchannel *bch) } bz->za[new_f1].z1 = cpu_to_le16(new_z1); /* for next buffer */ bz->f1 = new_f1; /* next frame */ - dev_kfree_skb(bch->tx_skb); + dev_kfree_skb_any(bch->tx_skb); get_next_bframe(bch); } @@ -1119,7 +1119,7 @@ tx_birq(struct bchannel *bch) if (bch->tx_skb && bch->tx_idx < bch->tx_skb->len) hfcpci_fill_fifo(bch); else { - dev_kfree_skb(bch->tx_skb); + dev_kfree_skb_any(bch->tx_skb); if (get_next_bframe(bch)) hfcpci_fill_fifo(bch); } @@ -2277,7 +2277,7 @@ _hfcpci_softirq(struct device *dev, void *unused) return 0; if (hc->hw.int_m2 & HFCPCI_IRQ_ENABLE) { - spin_lock(&hc->lock); + spin_lock_irq(&hc->lock); bch = Sel_BCS(hc, hc->hw.bswapped ? 2 : 1); if (bch && bch->state == ISDN_P_B_RAW) { /* B1 rx&tx */ main_rec_hfcpci(bch); @@ -2288,7 +2288,7 @@ _hfcpci_softirq(struct device *dev, void *unused) main_rec_hfcpci(bch); tx_birq(bch); } - spin_unlock(&hc->lock); + spin_unlock_irq(&hc->lock); } return 0; } diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c index 086426139173f..7366e85c09fd9 100644 --- a/drivers/mtd/nand/raw/fsl_upm.c +++ b/drivers/mtd/nand/raw/fsl_upm.c @@ -135,7 +135,7 @@ static int fun_exec_op(struct nand_chip *chip, const struct nand_operation *op, unsigned int i; int ret; - if (op->cs > NAND_MAX_CHIPS) + if (op->cs >= NAND_MAX_CHIPS) return -EINVAL; if (check_only) diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c index 4efb96e4e1c7a..b1325cf7abba6 100644 --- a/drivers/mtd/nand/raw/meson_nand.c +++ b/drivers/mtd/nand/raw/meson_nand.c @@ -1184,7 +1184,6 @@ static int meson_nand_attach_chip(struct nand_chip *nand) struct meson_nfc *nfc = nand_get_controller_data(nand); struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand); struct mtd_info *mtd = nand_to_mtd(nand); - int nsectors = mtd->writesize / 1024; int ret; if (!mtd->name) { @@ -1202,7 +1201,7 @@ static int meson_nand_attach_chip(struct nand_chip *nand) nand->options |= NAND_NO_SUBPAGE_WRITE; ret = nand_ecc_choose_conf(nand, nfc->data->ecc_caps, - mtd->oobsize - 2 * nsectors); + mtd->oobsize - 2); if (ret) { dev_err(nfc->dev, "failed to ECC init\n"); return -EINVAL; diff --git a/drivers/mtd/nand/raw/omap_elm.c b/drivers/mtd/nand/raw/omap_elm.c index 6e1eac6644a66..4a97d4a76454a 100644 --- a/drivers/mtd/nand/raw/omap_elm.c +++ b/drivers/mtd/nand/raw/omap_elm.c @@ -177,17 +177,17 @@ static void elm_load_syndrome(struct elm_info *info, switch (info->bch_type) { case BCH8_ECC: /* syndrome fragment 0 = ecc[9-12B] */ - val = cpu_to_be32(*(u32 *) &ecc[9]); + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[9]); elm_write_reg(info, offset, val); /* syndrome fragment 1 = ecc[5-8B] */ offset += 4; - val = cpu_to_be32(*(u32 *) &ecc[5]); + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[5]); elm_write_reg(info, offset, val); /* syndrome fragment 2 = ecc[1-4B] */ offset += 4; - val = cpu_to_be32(*(u32 *) &ecc[1]); + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[1]); elm_write_reg(info, offset, val); /* syndrome fragment 3 = ecc[0B] */ @@ -197,35 +197,35 @@ static void elm_load_syndrome(struct elm_info *info, break; case BCH4_ECC: /* syndrome fragment 0 = ecc[20-52b] bits */ - val = (cpu_to_be32(*(u32 *) &ecc[3]) >> 4) | + val = ((__force u32)cpu_to_be32(*(u32 *)&ecc[3]) >> 4) | ((ecc[2] & 0xf) << 28); elm_write_reg(info, offset, val); /* syndrome fragment 1 = ecc[0-20b] bits */ offset += 4; - val = cpu_to_be32(*(u32 *) &ecc[0]) >> 12; + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[0]) >> 12; elm_write_reg(info, offset, val); break; case BCH16_ECC: - val = cpu_to_be32(*(u32 *) &ecc[22]); + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[22]); elm_write_reg(info, offset, val); offset += 4; - val = cpu_to_be32(*(u32 *) &ecc[18]); + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[18]); elm_write_reg(info, offset, val); offset += 4; - val = cpu_to_be32(*(u32 *) &ecc[14]); + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[14]); elm_write_reg(info, offset, val); offset += 4; - val = cpu_to_be32(*(u32 *) &ecc[10]); + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[10]); elm_write_reg(info, offset, val); offset += 4; - val = cpu_to_be32(*(u32 *) &ecc[6]); + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[6]); elm_write_reg(info, offset, val); offset += 4; - val = cpu_to_be32(*(u32 *) &ecc[2]); + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[2]); elm_write_reg(info, offset, val); offset += 4; - val = cpu_to_be32(*(u32 *) &ecc[0]) >> 16; + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[0]) >> 16; elm_write_reg(info, offset, val); break; default: diff --git a/drivers/mtd/nand/raw/rockchip-nand-controller.c b/drivers/mtd/nand/raw/rockchip-nand-controller.c index 2312e27362cbe..5a04680342c32 100644 --- a/drivers/mtd/nand/raw/rockchip-nand-controller.c +++ b/drivers/mtd/nand/raw/rockchip-nand-controller.c @@ -562,9 +562,10 @@ static int rk_nfc_write_page_raw(struct nand_chip *chip, const u8 *buf, * BBM OOB1 OOB2 OOB3 |......| PA0 PA1 PA2 PA3 * * The rk_nfc_ooblayout_free() function already has reserved - * these 4 bytes with: + * these 4 bytes together with 2 bytes for BBM + * by reducing it's length: * - * oob_region->offset = NFC_SYS_DATA_SIZE + 2; + * oob_region->length = rknand->metadata_size - NFC_SYS_DATA_SIZE - 2; */ if (!i) memcpy(rk_nfc_oob_ptr(chip, i), @@ -597,7 +598,7 @@ static int rk_nfc_write_page_hwecc(struct nand_chip *chip, const u8 *buf, int pages_per_blk = mtd->erasesize / mtd->writesize; int ret = 0, i, boot_rom_mode = 0; dma_addr_t dma_data, dma_oob; - u32 reg; + u32 tmp; u8 *oob; nand_prog_page_begin_op(chip, page, 0, NULL, 0); @@ -624,6 +625,13 @@ static int rk_nfc_write_page_hwecc(struct nand_chip *chip, const u8 *buf, * * 0xFF 0xFF 0xFF 0xFF | BBM OOB1 OOB2 OOB3 | ... * + * The code here just swaps the first 4 bytes with the last + * 4 bytes without losing any data. + * + * The chip->oob_poi data layout: + * + * BBM OOB1 OOB2 OOB3 |......| PA0 PA1 PA2 PA3 + * * Configure the ECC algorithm supported by the boot ROM. */ if ((page < (pages_per_blk * rknand->boot_blks)) && @@ -634,21 +642,17 @@ static int rk_nfc_write_page_hwecc(struct nand_chip *chip, const u8 *buf, } for (i = 0; i < ecc->steps; i++) { - if (!i) { - reg = 0xFFFFFFFF; - } else { + if (!i) + oob = chip->oob_poi + (ecc->steps - 1) * NFC_SYS_DATA_SIZE; + else oob = chip->oob_poi + (i - 1) * NFC_SYS_DATA_SIZE; - reg = oob[0] | oob[1] << 8 | oob[2] << 16 | - oob[3] << 24; - } - if (!i && boot_rom_mode) - reg = (page & (pages_per_blk - 1)) * 4; + tmp = oob[0] | oob[1] << 8 | oob[2] << 16 | oob[3] << 24; if (nfc->cfg->type == NFC_V9) - nfc->oob_buf[i] = reg; + nfc->oob_buf[i] = tmp; else - nfc->oob_buf[i * (oob_step / 4)] = reg; + nfc->oob_buf[i * (oob_step / 4)] = tmp; } dma_data = dma_map_single(nfc->dev, (void *)nfc->page_buf, @@ -811,12 +815,17 @@ static int rk_nfc_read_page_hwecc(struct nand_chip *chip, u8 *buf, int oob_on, goto timeout_err; } - for (i = 1; i < ecc->steps; i++) { - oob = chip->oob_poi + (i - 1) * NFC_SYS_DATA_SIZE; + for (i = 0; i < ecc->steps; i++) { + if (!i) + oob = chip->oob_poi + (ecc->steps - 1) * NFC_SYS_DATA_SIZE; + else + oob = chip->oob_poi + (i - 1) * NFC_SYS_DATA_SIZE; + if (nfc->cfg->type == NFC_V9) tmp = nfc->oob_buf[i]; else tmp = nfc->oob_buf[i * (oob_step / 4)]; + *oob++ = (u8)tmp; *oob++ = (u8)(tmp >> 8); *oob++ = (u8)(tmp >> 16); @@ -933,12 +942,8 @@ static int rk_nfc_ooblayout_free(struct mtd_info *mtd, int section, if (section) return -ERANGE; - /* - * The beginning of the OOB area stores the reserved data for the NFC, - * the size of the reserved data is NFC_SYS_DATA_SIZE bytes. - */ oob_region->length = rknand->metadata_size - NFC_SYS_DATA_SIZE - 2; - oob_region->offset = NFC_SYS_DATA_SIZE + 2; + oob_region->offset = 2; return 0; } diff --git a/drivers/mtd/nand/spi/toshiba.c b/drivers/mtd/nand/spi/toshiba.c index 7380b1ebaccd5..a80427c131216 100644 --- a/drivers/mtd/nand/spi/toshiba.c +++ b/drivers/mtd/nand/spi/toshiba.c @@ -73,7 +73,7 @@ static int tx58cxgxsxraix_ecc_get_status(struct spinand_device *spinand, { struct nand_device *nand = spinand_to_nand(spinand); u8 mbf = 0; - struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, &mbf); + struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, spinand->scratchbuf); switch (status & STATUS_ECC_MASK) { case STATUS_ECC_NO_BITFLIPS: @@ -92,7 +92,7 @@ static int tx58cxgxsxraix_ecc_get_status(struct spinand_device *spinand, if (spi_mem_exec_op(spinand->spimem, &op)) return nanddev_get_ecc_conf(nand)->strength; - mbf >>= 4; + mbf = *(spinand->scratchbuf) >> 4; if (WARN_ON(mbf > nanddev_get_ecc_conf(nand)->strength || !mbf)) return nanddev_get_ecc_conf(nand)->strength; diff --git a/drivers/mtd/nand/spi/winbond.c b/drivers/mtd/nand/spi/winbond.c index 3ad58cd284d8b..f507e37593012 100644 --- a/drivers/mtd/nand/spi/winbond.c +++ b/drivers/mtd/nand/spi/winbond.c @@ -108,7 +108,7 @@ static int w25n02kv_ecc_get_status(struct spinand_device *spinand, { struct nand_device *nand = spinand_to_nand(spinand); u8 mbf = 0; - struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, &mbf); + struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, spinand->scratchbuf); switch (status & STATUS_ECC_MASK) { case STATUS_ECC_NO_BITFLIPS: @@ -126,7 +126,7 @@ static int w25n02kv_ecc_get_status(struct spinand_device *spinand, if (spi_mem_exec_op(spinand->spimem, &op)) return nanddev_get_ecc_conf(nand)->strength; - mbf >>= 4; + mbf = *(spinand->scratchbuf) >> 4; if (WARN_ON(mbf > nanddev_get_ecc_conf(nand)->strength || !mbf)) return nanddev_get_ecc_conf(nand)->strength; diff --git a/drivers/mtd/spi-nor/spansion.c b/drivers/mtd/spi-nor/spansion.c index 36876aa849ede..15f9a80c10b9b 100644 --- a/drivers/mtd/spi-nor/spansion.c +++ b/drivers/mtd/spi-nor/spansion.c @@ -361,7 +361,7 @@ static int cypress_nor_determine_addr_mode_by_sr1(struct spi_nor *nor, */ static int cypress_nor_set_addr_mode_nbytes(struct spi_nor *nor) { - struct spi_mem_op op = {}; + struct spi_mem_op op; u8 addr_mode; int ret; @@ -492,7 +492,7 @@ s25fs256t_post_bfpt_fixup(struct spi_nor *nor, const struct sfdp_parameter_header *bfpt_header, const struct sfdp_bfpt *bfpt) { - struct spi_mem_op op = {}; + struct spi_mem_op op; int ret; ret = cypress_nor_set_addr_mode_nbytes(nor); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index cde253d27bd08..72374b066f64a 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1436,7 +1436,9 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) if (IS_ERR(priv->clk)) return PTR_ERR(priv->clk); - clk_prepare_enable(priv->clk); + ret = clk_prepare_enable(priv->clk); + if (ret) + return ret; priv->clk_mdiv = devm_clk_get_optional(&pdev->dev, "sw_switch_mdiv"); if (IS_ERR(priv->clk_mdiv)) { @@ -1444,7 +1446,9 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) goto out_clk; } - clk_prepare_enable(priv->clk_mdiv); + ret = clk_prepare_enable(priv->clk_mdiv); + if (ret) + goto out_clk; ret = bcm_sf2_sw_rst(priv); if (ret) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b499bc9c4e067..e481960cb6c7a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -633,12 +633,13 @@ tx_kick_pending: return NETDEV_TX_OK; } -static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts) +static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) { struct bnxt_tx_ring_info *txr = bnapi->tx_ring; struct netdev_queue *txq = netdev_get_tx_queue(bp->dev, txr->txq_index); u16 cons = txr->tx_cons; struct pci_dev *pdev = bp->pdev; + int nr_pkts = bnapi->tx_pkts; int i; unsigned int tx_bytes = 0; @@ -688,6 +689,7 @@ next_tx_int: dev_kfree_skb_any(skb); } + bnapi->tx_pkts = 0; WRITE_ONCE(txr->tx_cons, cons); __netif_txq_completed_wake(txq, nr_pkts, tx_bytes, @@ -697,17 +699,24 @@ next_tx_int: static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, struct bnxt_rx_ring_info *rxr, + unsigned int *offset, gfp_t gfp) { struct device *dev = &bp->pdev->dev; struct page *page; - page = page_pool_dev_alloc_pages(rxr->page_pool); + if (PAGE_SIZE > BNXT_RX_PAGE_SIZE) { + page = page_pool_dev_alloc_frag(rxr->page_pool, offset, + BNXT_RX_PAGE_SIZE); + } else { + page = page_pool_dev_alloc_pages(rxr->page_pool); + *offset = 0; + } if (!page) return NULL; - *mapping = dma_map_page_attrs(dev, page, 0, PAGE_SIZE, bp->rx_dir, - DMA_ATTR_WEAK_ORDERING); + *mapping = dma_map_page_attrs(dev, page, *offset, BNXT_RX_PAGE_SIZE, + bp->rx_dir, DMA_ATTR_WEAK_ORDERING); if (dma_mapping_error(dev, *mapping)) { page_pool_recycle_direct(rxr->page_pool, page); return NULL; @@ -747,15 +756,16 @@ int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, dma_addr_t mapping; if (BNXT_RX_PAGE_MODE(bp)) { + unsigned int offset; struct page *page = - __bnxt_alloc_rx_page(bp, &mapping, rxr, gfp); + __bnxt_alloc_rx_page(bp, &mapping, rxr, &offset, gfp); if (!page) return -ENOMEM; mapping += bp->rx_dma_offset; rx_buf->data = page; - rx_buf->data_ptr = page_address(page) + bp->rx_offset; + rx_buf->data_ptr = page_address(page) + offset + bp->rx_offset; } else { u8 *data = __bnxt_alloc_rx_frag(bp, &mapping, gfp); @@ -815,7 +825,7 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp, unsigned int offset = 0; if (BNXT_RX_PAGE_MODE(bp)) { - page = __bnxt_alloc_rx_page(bp, &mapping, rxr, gfp); + page = __bnxt_alloc_rx_page(bp, &mapping, rxr, &offset, gfp); if (!page) return -ENOMEM; @@ -962,15 +972,15 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp, return NULL; } dma_addr -= bp->rx_dma_offset; - dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir, - DMA_ATTR_WEAK_ORDERING); - skb = build_skb(page_address(page), PAGE_SIZE); + dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, BNXT_RX_PAGE_SIZE, + bp->rx_dir, DMA_ATTR_WEAK_ORDERING); + skb = build_skb(data_ptr - bp->rx_offset, BNXT_RX_PAGE_SIZE); if (!skb) { page_pool_recycle_direct(rxr->page_pool, page); return NULL; } skb_mark_for_recycle(skb); - skb_reserve(skb, bp->rx_dma_offset); + skb_reserve(skb, bp->rx_offset); __skb_put(skb, len); return skb; @@ -996,8 +1006,8 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, return NULL; } dma_addr -= bp->rx_dma_offset; - dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir, - DMA_ATTR_WEAK_ORDERING); + dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, BNXT_RX_PAGE_SIZE, + bp->rx_dir, DMA_ATTR_WEAK_ORDERING); if (unlikely(!payload)) payload = eth_get_headlen(bp->dev, data_ptr, len); @@ -1010,7 +1020,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, skb_mark_for_recycle(skb); off = (void *)data_ptr - page_address(page); - skb_add_rx_frag(skb, 0, page, off, len, PAGE_SIZE); + skb_add_rx_frag(skb, 0, page, off, len, BNXT_RX_PAGE_SIZE); memcpy(skb->data - NET_IP_ALIGN, data_ptr - NET_IP_ALIGN, payload + NET_IP_ALIGN); @@ -1145,7 +1155,7 @@ static struct sk_buff *bnxt_rx_agg_pages_skb(struct bnxt *bp, skb->data_len += total_frag_len; skb->len += total_frag_len; - skb->truesize += PAGE_SIZE * agg_bufs; + skb->truesize += BNXT_RX_PAGE_SIZE * agg_bufs; return skb; } @@ -2573,12 +2583,11 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, return rx_pkts; } -static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi) +static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi, + int budget) { - if (bnapi->tx_pkts) { - bnapi->tx_int(bp, bnapi, bnapi->tx_pkts); - bnapi->tx_pkts = 0; - } + if (bnapi->tx_pkts) + bnapi->tx_int(bp, bnapi, budget); if ((bnapi->events & BNXT_RX_EVENT) && !(bnapi->in_reset)) { struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; @@ -2607,7 +2616,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, */ bnxt_db_cq(bp, &cpr->cp_db, cpr->cp_raw_cons); - __bnxt_poll_work_done(bp, bnapi); + __bnxt_poll_work_done(bp, bnapi, budget); return rx_pkts; } @@ -2738,7 +2747,7 @@ static int __bnxt_poll_cqs(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) } static void __bnxt_poll_cqs_done(struct bnxt *bp, struct bnxt_napi *bnapi, - u64 dbr_type) + u64 dbr_type, int budget) { struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; int i; @@ -2754,7 +2763,7 @@ static void __bnxt_poll_cqs_done(struct bnxt *bp, struct bnxt_napi *bnapi, cpr2->had_work_done = 0; } } - __bnxt_poll_work_done(bp, bnapi); + __bnxt_poll_work_done(bp, bnapi, budget); } static int bnxt_poll_p5(struct napi_struct *napi, int budget) @@ -2784,7 +2793,8 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget) if (cpr->has_more_work) break; - __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ_ARMALL); + __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ_ARMALL, + budget); cpr->cp_raw_cons = raw_cons; if (napi_complete_done(napi, work_done)) BNXT_DB_NQ_ARM_P5(&cpr->cp_db, @@ -2814,7 +2824,7 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget) } raw_cons = NEXT_RAW_CMP(raw_cons); } - __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ); + __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ, budget); if (raw_cons != cpr->cp_raw_cons) { cpr->cp_raw_cons = raw_cons; BNXT_DB_NQ_P5(&cpr->cp_db, raw_cons); @@ -2947,8 +2957,8 @@ skip_rx_tpa_free: rx_buf->data = NULL; if (BNXT_RX_PAGE_MODE(bp)) { mapping -= bp->rx_dma_offset; - dma_unmap_page_attrs(&pdev->dev, mapping, PAGE_SIZE, - bp->rx_dir, + dma_unmap_page_attrs(&pdev->dev, mapping, + BNXT_RX_PAGE_SIZE, bp->rx_dir, DMA_ATTR_WEAK_ORDERING); page_pool_recycle_direct(rxr->page_pool, data); } else { @@ -3217,6 +3227,8 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp, pp.napi = &rxr->bnapi->napi; pp.dev = &bp->pdev->dev; pp.dma_dir = DMA_BIDIRECTIONAL; + if (PAGE_SIZE > BNXT_RX_PAGE_SIZE) + pp.flags |= PP_FLAG_PAGE_FRAG; rxr->page_pool = page_pool_create(&pp); if (IS_ERR(rxr->page_pool)) { @@ -3993,26 +4005,29 @@ void bnxt_set_ring_params(struct bnxt *bp) */ int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode) { + struct net_device *dev = bp->dev; + if (page_mode) { bp->flags &= ~BNXT_FLAG_AGG_RINGS; bp->flags |= BNXT_FLAG_RX_PAGE_MODE; - if (bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) { + if (bp->xdp_prog->aux->xdp_has_frags) + dev->max_mtu = min_t(u16, bp->max_mtu, BNXT_MAX_MTU); + else + dev->max_mtu = + min_t(u16, bp->max_mtu, BNXT_MAX_PAGE_MODE_MTU); + if (dev->mtu > BNXT_MAX_PAGE_MODE_MTU) { bp->flags |= BNXT_FLAG_JUMBO; bp->rx_skb_func = bnxt_rx_multi_page_skb; - bp->dev->max_mtu = - min_t(u16, bp->max_mtu, BNXT_MAX_MTU); } else { bp->flags |= BNXT_FLAG_NO_AGG_RINGS; bp->rx_skb_func = bnxt_rx_page_skb; - bp->dev->max_mtu = - min_t(u16, bp->max_mtu, BNXT_MAX_PAGE_MODE_MTU); } bp->rx_dir = DMA_BIDIRECTIONAL; /* Disable LRO or GRO_HW */ - netdev_update_features(bp->dev); + netdev_update_features(dev); } else { - bp->dev->max_mtu = bp->max_mtu; + dev->max_mtu = bp->max_mtu; bp->flags &= ~BNXT_FLAG_RX_PAGE_MODE; bp->rx_dir = DMA_FROM_DEVICE; bp->rx_skb_func = bnxt_rx_skb; @@ -9433,6 +9448,8 @@ static void bnxt_enable_napi(struct bnxt *bp) cpr->sw_stats.rx.rx_resets++; bnapi->in_reset = false; + bnapi->tx_pkts = 0; + if (bnapi->rx_ring) { INIT_WORK(&cpr->dim.work, bnxt_dim_work); cpr->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 080e73496066b..bb95c3dc5270f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1005,7 +1005,7 @@ struct bnxt_napi { struct bnxt_tx_ring_info *tx_ring; void (*tx_int)(struct bnxt *, struct bnxt_napi *, - int); + int budget); int tx_pkts; u8 events; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 4efa5fe6972b2..fb43232310b2d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -125,16 +125,20 @@ static void __bnxt_xmit_xdp_redirect(struct bnxt *bp, dma_unmap_len_set(tx_buf, len, 0); } -void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts) +void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) { struct bnxt_tx_ring_info *txr = bnapi->tx_ring; struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; bool rx_doorbell_needed = false; + int nr_pkts = bnapi->tx_pkts; struct bnxt_sw_tx_bd *tx_buf; u16 tx_cons = txr->tx_cons; u16 last_tx_cons = tx_cons; int i, j, frags; + if (!budget) + return; + for (i = 0; i < nr_pkts; i++) { tx_buf = &txr->tx_buf_ring[tx_cons]; @@ -161,6 +165,8 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts) } tx_cons = NEXT_TX(tx_cons); } + + bnapi->tx_pkts = 0; WRITE_ONCE(txr->tx_cons, tx_cons); if (rx_doorbell_needed) { tx_buf = &txr->tx_buf_ring[last_tx_cons]; @@ -180,8 +186,8 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, u8 *data_ptr, unsigned int len, struct xdp_buff *xdp) { + u32 buflen = BNXT_RX_PAGE_SIZE; struct bnxt_sw_rx_bd *rx_buf; - u32 buflen = PAGE_SIZE; struct pci_dev *pdev; dma_addr_t mapping; u32 offset; @@ -297,7 +303,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, rx_buf = &rxr->rx_buf_ring[cons]; mapping = rx_buf->mapping - bp->rx_dma_offset; dma_unmap_page_attrs(&pdev->dev, mapping, - PAGE_SIZE, bp->rx_dir, + BNXT_RX_PAGE_SIZE, bp->rx_dir, DMA_ATTR_WEAK_ORDERING); /* if we are unable to allocate a new buffer, abort and reuse */ @@ -480,7 +486,7 @@ bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags, } xdp_update_skb_shared_info(skb, num_frags, sinfo->xdp_frags_size, - PAGE_SIZE * sinfo->nr_frags, + BNXT_RX_PAGE_SIZE * sinfo->nr_frags, xdp_buff_is_frag_pfmemalloc(xdp)); return skb; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index ea430d6961df3..5e412c5655ba5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -16,7 +16,7 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, struct bnxt_tx_ring_info *txr, dma_addr_t mapping, u32 len, struct xdp_buff *xdp); -void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts); +void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget); bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, struct xdp_buff xdp, struct page *page, u8 **data_ptr, unsigned int *len, u8 *event); diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index a52cf9aae4988..5ef073a79ce94 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -57,6 +57,7 @@ #include #include +#include #include #include diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index fbe70458fda27..34e8e7cb1bc54 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -9055,6 +9055,7 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type, { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_pf *pf = np->vsi->back; + bool locked = false; int err; switch (type) { @@ -9064,10 +9065,27 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type, ice_setup_tc_block_cb, np, np, true); case TC_SETUP_QDISC_MQPRIO: + if (pf->adev) { + mutex_lock(&pf->adev_mutex); + device_lock(&pf->adev->dev); + locked = true; + if (pf->adev->dev.driver) { + netdev_err(netdev, "Cannot change qdisc when RDMA is active\n"); + err = -EBUSY; + goto adev_unlock; + } + } + /* setup traffic classifier for receive side */ mutex_lock(&pf->tc_mutex); err = ice_setup_tc_mqprio_qdisc(netdev, type_data); mutex_unlock(&pf->tc_mutex); + +adev_unlock: + if (locked) { + device_unlock(&pf->adev->dev); + mutex_unlock(&pf->adev_mutex); + } return err; default: return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index 2b9335cb4bb3a..8537578e1cf1d 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -1302,11 +1302,10 @@ static int korina_probe(struct platform_device *pdev) else if (of_get_ethdev_address(pdev->dev.of_node, dev) < 0) eth_hw_addr_random(dev); - clk = devm_clk_get_optional(&pdev->dev, "mdioclk"); + clk = devm_clk_get_optional_enabled(&pdev->dev, "mdioclk"); if (IS_ERR(clk)) return PTR_ERR(clk); if (clk) { - clk_prepare_enable(clk); lp->mii_clock_freq = clk_get_rate(clk); } else { lp->mii_clock_freq = 200000000; /* max possible input clk */ diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c index 035ead7935c74..dab61cc1acb57 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c @@ -98,6 +98,9 @@ int octep_ctrl_mbox_init(struct octep_ctrl_mbox *mbox) writeq(OCTEP_CTRL_MBOX_STATUS_INIT, OCTEP_CTRL_MBOX_INFO_HOST_STATUS(mbox->barmem)); + mutex_init(&mbox->h2fq_lock); + mutex_init(&mbox->f2hq_lock); + mbox->h2fq.sz = readl(OCTEP_CTRL_MBOX_H2FQ_SZ(mbox->barmem)); mbox->h2fq.hw_prod = OCTEP_CTRL_MBOX_H2FQ_PROD(mbox->barmem); mbox->h2fq.hw_cons = OCTEP_CTRL_MBOX_H2FQ_CONS(mbox->barmem); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c index f328d957b2db7..35857dc19542f 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c @@ -727,7 +727,8 @@ pick_fw_ver: err = request_firmware_direct(&fw->bin, fw_path, fw->dev.dev); if (err) { - if (ver_maj == PRESTERA_SUPP_FW_MAJ_VER) { + if (ver_maj != PRESTERA_PREV_FW_MAJ_VER || + ver_min != PRESTERA_PREV_FW_MIN_VER) { ver_maj = PRESTERA_PREV_FW_MAJ_VER; ver_min = PRESTERA_PREV_FW_MIN_VER; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index f0c3464f037f4..0c88cf47af01b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -1030,9 +1030,6 @@ int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, int out_index; int err = 0; - if (!mlx5e_is_eswitch_flow(flow)) - return 0; - parse_attr = attr->parse_attr; esw_attr = attr->esw_attr; *vf_tun = false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index d97e6df66f454..b8dd744536553 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -323,8 +323,11 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, net_prefetch(mxbuf->xdp.data); prog = rcu_dereference(rq->xdp_prog); - if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) + if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) { + if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) + wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); return NULL; /* page/packet was consumed by XDP */ + } /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse * will be handled by mlx5e_free_rx_wqe. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index dbe87bf89c0dd..832d36be4a17b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -808,9 +808,9 @@ static void setup_fte_upper_proto_match(struct mlx5_flow_spec *spec, struct upsp } if (upspec->sport) { - MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, udp_dport, + MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, udp_sport, upspec->sport_mask); - MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, udp_dport, upspec->sport); + MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, udp_sport, upspec->sport); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index eab5bc718771f..8d995e3048692 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -58,7 +58,9 @@ static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x) trailer_len = alen + plen + 2; - pskb_trim(skb, skb->len - trailer_len); + ret = pskb_trim(skb, skb->len - trailer_len); + if (unlikely(ret)) + return ret; if (skb->protocol == htons(ETH_P_IP)) { ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len); ip_send_check(ipv4hdr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c index cf704f106b7c2..984fa04bd331b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -188,7 +188,6 @@ static void mlx5e_tls_debugfs_init(struct mlx5e_tls *tls, int mlx5e_ktls_init(struct mlx5e_priv *priv) { - struct mlx5_crypto_dek_pool *dek_pool; struct mlx5e_tls *tls; if (!mlx5e_is_ktls_device(priv->mdev)) @@ -199,12 +198,6 @@ int mlx5e_ktls_init(struct mlx5e_priv *priv) return -ENOMEM; tls->mdev = priv->mdev; - dek_pool = mlx5_crypto_dek_pool_create(priv->mdev, MLX5_ACCEL_OBJ_TLS_KEY); - if (IS_ERR(dek_pool)) { - kfree(tls); - return PTR_ERR(dek_pool); - } - tls->dek_pool = dek_pool; priv->tls = tls; mlx5e_tls_debugfs_init(tls, priv->dfs_root); @@ -222,7 +215,6 @@ void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) debugfs_remove_recursive(tls->debugfs.dfs); tls->debugfs.dfs = NULL; - mlx5_crypto_dek_pool_destroy(tls->dek_pool); kfree(priv->tls); priv->tls = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 0e4c0a093293a..c49363dd6bf9a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -908,28 +908,51 @@ static void mlx5e_tls_tx_debugfs_init(struct mlx5e_tls *tls, int mlx5e_ktls_init_tx(struct mlx5e_priv *priv) { + struct mlx5_crypto_dek_pool *dek_pool; struct mlx5e_tls *tls = priv->tls; + int err; + + if (!mlx5e_is_ktls_device(priv->mdev)) + return 0; + + /* DEK pool could be used by either or both of TX and RX. But we have to + * put the creation here to avoid syndrome when doing devlink reload. + */ + dek_pool = mlx5_crypto_dek_pool_create(priv->mdev, MLX5_ACCEL_OBJ_TLS_KEY); + if (IS_ERR(dek_pool)) + return PTR_ERR(dek_pool); + tls->dek_pool = dek_pool; if (!mlx5e_is_ktls_tx(priv->mdev)) return 0; priv->tls->tx_pool = mlx5e_tls_tx_pool_init(priv->mdev, &priv->tls->sw_stats); - if (!priv->tls->tx_pool) - return -ENOMEM; + if (!priv->tls->tx_pool) { + err = -ENOMEM; + goto err_tx_pool_init; + } mlx5e_tls_tx_debugfs_init(tls, tls->debugfs.dfs); return 0; + +err_tx_pool_init: + mlx5_crypto_dek_pool_destroy(dek_pool); + return err; } void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv) { if (!mlx5e_is_ktls_tx(priv->mdev)) - return; + goto dek_pool_destroy; debugfs_remove_recursive(priv->tls->debugfs.dfs_tx); priv->tls->debugfs.dfs_tx = NULL; mlx5e_tls_tx_pool_cleanup(priv->tls->tx_pool); priv->tls->tx_pool = NULL; + +dek_pool_destroy: + if (mlx5e_is_ktls_device(priv->mdev)) + mlx5_crypto_dek_pool_destroy(priv->tls->dek_pool); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c index 7fc901a6ec5fc..414e285848813 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c @@ -161,6 +161,7 @@ static int macsec_fs_tx_create_crypto_table_groups(struct mlx5e_flow_table *ft) if (!in) { kfree(ft->g); + ft->g = NULL; return -ENOMEM; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index bed0c2d043e70..329d8c90facdd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -135,6 +135,16 @@ static void arfs_del_rules(struct mlx5e_flow_steering *fs); int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs) { + /* Moving to switchdev mode, fs->arfs is freed by mlx5e_nic_profile + * cleanup_rx callback and it is not recreated when + * mlx5e_uplink_rep_profile is loaded as mlx5e_create_flow_steering() + * is not called by the uplink_rep profile init_rx callback. Thus, if + * ntuple is set, moving to switchdev flow will enter this function + * with fs->arfs nullified. + */ + if (!mlx5e_fs_get_arfs(fs)) + return 0; + arfs_del_rules(fs); return arfs_disable(fs); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a5bdf78955d76..f084513fbead4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1036,7 +1036,23 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_s return err; } -static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) +static void mlx5e_flush_rq_cq(struct mlx5e_rq *rq) +{ + struct mlx5_cqwq *cqwq = &rq->cq.wq; + struct mlx5_cqe64 *cqe; + + if (test_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state)) { + while ((cqe = mlx5_cqwq_get_cqe_enahnced_comp(cqwq))) + mlx5_cqwq_pop(cqwq); + } else { + while ((cqe = mlx5_cqwq_get_cqe(cqwq))) + mlx5_cqwq_pop(cqwq); + } + + mlx5_cqwq_update_db_record(cqwq); +} + +int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state) { struct net_device *dev = rq->netdev; int err; @@ -1046,6 +1062,10 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn); return err; } + + mlx5e_free_rx_descs(rq); + mlx5e_flush_rq_cq(rq); + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err) { netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn); @@ -1055,13 +1075,6 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) return 0; } -int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state) -{ - mlx5e_free_rx_descs(rq); - - return mlx5e_rq_to_ready(rq, curr_state); -} - static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) { struct mlx5_core_dev *mdev = rq->mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 3e7041bd5705e..ad63d1f9a611f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -964,7 +964,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) err = mlx5e_open_drop_rq(priv, &priv->drop_rq); if (err) { mlx5_core_err(mdev, "open drop rq failed, %d\n", err); - return err; + goto err_rx_res_free; } err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0, @@ -998,6 +998,7 @@ err_destroy_rx_res: mlx5e_rx_res_destroy(priv->rx_res); err_close_drop_rq: mlx5e_close_drop_rq(&priv->drop_rq); +err_rx_res_free: mlx5e_rx_res_free(priv->rx_res); priv->rx_res = NULL; err_free_fs: @@ -1111,6 +1112,10 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) return err; } + err = mlx5e_rep_neigh_init(rpriv); + if (err) + goto err_neigh_init; + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { err = mlx5e_init_uplink_rep_tx(rpriv); if (err) @@ -1127,6 +1132,8 @@ err_ht_init: if (rpriv->rep->vport == MLX5_VPORT_UPLINK) mlx5e_cleanup_uplink_rep_tx(rpriv); err_init_tx: + mlx5e_rep_neigh_cleanup(rpriv); +err_neigh_init: mlx5e_destroy_tises(priv); return err; } @@ -1140,22 +1147,17 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) if (rpriv->rep->vport == MLX5_VPORT_UPLINK) mlx5e_cleanup_uplink_rep_tx(rpriv); + mlx5e_rep_neigh_cleanup(rpriv); mlx5e_destroy_tises(priv); } static void mlx5e_rep_enable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; - mlx5e_set_netdev_mtu_boundaries(priv); - mlx5e_rep_neigh_init(rpriv); } static void mlx5e_rep_disable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; - - mlx5e_rep_neigh_cleanup(rpriv); } static int mlx5e_update_rep_rx(struct mlx5e_priv *priv) @@ -1205,7 +1207,6 @@ static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; u16 max_mtu; @@ -1227,7 +1228,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) mlx5_notifier_register(mdev, &priv->events_nb); mlx5e_dcbnl_initialize(priv); mlx5e_dcbnl_init_app(priv); - mlx5e_rep_neigh_init(rpriv); mlx5e_rep_bridge_init(priv); netdev->wanted_features |= NETIF_F_HW_TC; @@ -1242,7 +1242,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_core_dev *mdev = priv->mdev; rtnl_lock(); @@ -1252,7 +1251,6 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) rtnl_unlock(); mlx5e_rep_bridge_cleanup(priv); - mlx5e_rep_neigh_cleanup(rpriv); mlx5e_dcbnl_delete_app(priv); mlx5_notifier_unregister(mdev, &priv->events_nb); mlx5e_rep_tc_disable(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index ed05ac8ae1de5..e002f013fa015 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1725,6 +1725,19 @@ verify_attr_actions(u32 actions, struct netlink_ext_ack *extack) return 0; } +static bool +has_encap_dests(struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int out_index; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) + if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) + return true; + + return false; +} + static int post_process_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr, @@ -1737,9 +1750,11 @@ post_process_attr(struct mlx5e_tc_flow *flow, if (err) goto err_out; - err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun); - if (err) - goto err_out; + if (mlx5e_is_eswitch_flow(flow) && has_encap_dests(attr)) { + err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun); + if (err) + goto err_out; + } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 8d19c20d3447e..c1f419b36289c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1376,7 +1376,6 @@ esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb) esw_init_chains_offload_flags(esw, &attr.flags); attr.ns = MLX5_FLOW_NAMESPACE_FDB; - attr.fs_base_prio = FDB_TC_OFFLOAD; attr.max_grp_num = esw->params.large_group_num; attr.default_ft = miss_fdb; attr.mapping = esw->offloads.reg_c0_obj_pool; @@ -4073,7 +4072,7 @@ int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable, } hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); - MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, 1); + MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, enable); err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport->vport, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 19da02c416161..5f87c446d3d97 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -889,7 +889,7 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root, struct fs_node *iter = list_entry(start, struct fs_node, list); struct mlx5_flow_table *ft = NULL; - if (!root || root->type == FS_TYPE_PRIO_CHAINS) + if (!root) return NULL; list_for_each_advance_continue(iter, &root->children, reverse) { @@ -905,20 +905,42 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root, return ft; } -/* If reverse is false then return the first flow table in next priority of - * prio in the tree, else return the last flow table in the previous priority - * of prio in the tree. +static struct fs_node *find_prio_chains_parent(struct fs_node *parent, + struct fs_node **child) +{ + struct fs_node *node = NULL; + + while (parent && parent->type != FS_TYPE_PRIO_CHAINS) { + node = parent; + parent = parent->parent; + } + + if (child) + *child = node; + + return parent; +} + +/* If reverse is false then return the first flow table next to the passed node + * in the tree, else return the last flow table before the node in the tree. + * If skip is true, skip the flow tables in the same prio_chains prio. */ -static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse) +static struct mlx5_flow_table *find_closest_ft(struct fs_node *node, bool reverse, + bool skip) { + struct fs_node *prio_chains_parent = NULL; struct mlx5_flow_table *ft = NULL; struct fs_node *curr_node; struct fs_node *parent; - parent = prio->node.parent; - curr_node = &prio->node; + if (skip) + prio_chains_parent = find_prio_chains_parent(node, NULL); + parent = node->parent; + curr_node = node; while (!ft && parent) { - ft = find_closest_ft_recursive(parent, &curr_node->list, reverse); + if (parent != prio_chains_parent) + ft = find_closest_ft_recursive(parent, &curr_node->list, + reverse); curr_node = parent; parent = curr_node->parent; } @@ -926,15 +948,15 @@ static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool revers } /* Assuming all the tree is locked by mutex chain lock */ -static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio) +static struct mlx5_flow_table *find_next_chained_ft(struct fs_node *node) { - return find_closest_ft(prio, false); + return find_closest_ft(node, false, true); } /* Assuming all the tree is locked by mutex chain lock */ -static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio) +static struct mlx5_flow_table *find_prev_chained_ft(struct fs_node *node) { - return find_closest_ft(prio, true); + return find_closest_ft(node, true, true); } static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft, @@ -946,7 +968,7 @@ static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft, next_ns = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS; fs_get_obj(prio, next_ns ? ft->ns->node.parent : ft->node.parent); - return find_next_chained_ft(prio); + return find_next_chained_ft(&prio->node); } static int connect_fts_in_prio(struct mlx5_core_dev *dev, @@ -970,21 +992,55 @@ static int connect_fts_in_prio(struct mlx5_core_dev *dev, return 0; } +static struct mlx5_flow_table *find_closet_ft_prio_chains(struct fs_node *node, + struct fs_node *parent, + struct fs_node **child, + bool reverse) +{ + struct mlx5_flow_table *ft; + + ft = find_closest_ft(node, reverse, false); + + if (ft && parent == find_prio_chains_parent(&ft->node, child)) + return ft; + + return NULL; +} + /* Connect flow tables from previous priority of prio to ft */ static int connect_prev_fts(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, struct fs_prio *prio) { + struct fs_node *prio_parent, *parent = NULL, *child, *node; struct mlx5_flow_table *prev_ft; + int err = 0; + + prio_parent = find_prio_chains_parent(&prio->node, &child); + + /* return directly if not under the first sub ns of prio_chains prio */ + if (prio_parent && !list_is_first(&child->list, &prio_parent->children)) + return 0; - prev_ft = find_prev_chained_ft(prio); - if (prev_ft) { + prev_ft = find_prev_chained_ft(&prio->node); + while (prev_ft) { struct fs_prio *prev_prio; fs_get_obj(prev_prio, prev_ft->node.parent); - return connect_fts_in_prio(dev, prev_prio, ft); + err = connect_fts_in_prio(dev, prev_prio, ft); + if (err) + break; + + if (!parent) { + parent = find_prio_chains_parent(&prev_prio->node, &child); + if (!parent) + break; + } + + node = child; + prev_ft = find_closet_ft_prio_chains(node, parent, &child, true); } - return 0; + return err; } static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio @@ -1123,7 +1179,7 @@ static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table if (err) return err; - next_ft = first_ft ? first_ft : find_next_chained_ft(prio); + next_ft = first_ft ? first_ft : find_next_chained_ft(&prio->node); err = connect_fwd_rules(dev, ft, next_ft); if (err) return err; @@ -1198,7 +1254,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table); next_ft = unmanaged ? ft_attr->next_ft : - find_next_chained_ft(fs_prio); + find_next_chained_ft(&fs_prio->node); ft->def_miss_action = ns->def_miss_action; ft->ns = ns; err = root->cmds->create_flow_table(root, ft, ft_attr, next_ft); @@ -2195,13 +2251,20 @@ EXPORT_SYMBOL(mlx5_del_flow_rules); /* Assuming prio->node.children(flow tables) is sorted by level */ static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft) { + struct fs_node *prio_parent, *child; struct fs_prio *prio; fs_get_obj(prio, ft->node.parent); if (!list_is_last(&ft->node.list, &prio->node.children)) return list_next_entry(ft, node.list); - return find_next_chained_ft(prio); + + prio_parent = find_prio_chains_parent(&prio->node, &child); + + if (prio_parent && list_is_first(&child->list, &prio_parent->children)) + return find_closest_ft(&prio->node, false, false); + + return find_next_chained_ft(&prio->node); } static int update_root_ft_destroy(struct mlx5_flow_table *ft) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index db9df9798ffac..a80ecb672f33d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -178,7 +178,7 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains, if (!mlx5_chains_ignore_flow_level_supported(chains) || (chain == 0 && prio == 1 && level == 0)) { ft_attr.level = chains->fs_base_level; - ft_attr.prio = chains->fs_base_prio; + ft_attr.prio = chains->fs_base_prio + prio - 1; ns = (chains->ns == MLX5_FLOW_NAMESPACE_FDB) ? mlx5_get_fdb_sub_ns(chains->dev, chain) : mlx5_get_flow_namespace(chains->dev, chains->ns); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index d6ee016deae17..c7a06c8bbb7a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1456,6 +1456,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev) if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { mlx5_core_warn(dev, "%s: interface is down, NOP\n", __func__); + mlx5_devlink_params_unregister(priv_to_devlink(dev)); mlx5_cleanup_once(dev); goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index 1aa525e509f10..293d2edd03d59 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -562,11 +562,12 @@ int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev, err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); if (err) - return err; + goto err_free_in; *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id); - kvfree(in); +err_free_in: + kvfree(in); return err; } diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index c5687d94ea885..7b7e1c5b00f47 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h index f8682356d0cf4..94d4f9413ab7a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h +++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h @@ -193,6 +193,22 @@ void qed_hw_remove(struct qed_dev *cdev); */ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn); +/** + * qed_ptt_acquire_context(): Allocate a PTT window honoring the context + * atomicy. + * + * @p_hwfn: HW device data. + * @is_atomic: Hint from the caller - if the func can sleep or not. + * + * Context: The function should not sleep in case is_atomic == true. + * Return: struct qed_ptt. + * + * Should be called at the entry point to the driver + * (at the beginning of an exported function). + */ +struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn, + bool is_atomic); + /** * qed_ptt_release(): Release PTT Window. * diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c index 3764190b948eb..04602ac947087 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c +++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c @@ -693,13 +693,14 @@ static void _qed_fcoe_get_pstats(struct qed_hwfn *p_hwfn, } static int qed_fcoe_get_stats(struct qed_hwfn *p_hwfn, - struct qed_fcoe_stats *p_stats) + struct qed_fcoe_stats *p_stats, + bool is_atomic) { struct qed_ptt *p_ptt; memset(p_stats, 0, sizeof(*p_stats)); - p_ptt = qed_ptt_acquire(p_hwfn); + p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic); if (!p_ptt) { DP_ERR(p_hwfn, "Failed to acquire ptt\n"); @@ -973,19 +974,27 @@ static int qed_fcoe_destroy_conn(struct qed_dev *cdev, QED_SPQ_MODE_EBLOCK, NULL); } +static int qed_fcoe_stats_context(struct qed_dev *cdev, + struct qed_fcoe_stats *stats, + bool is_atomic) +{ + return qed_fcoe_get_stats(QED_AFFIN_HWFN(cdev), stats, is_atomic); +} + static int qed_fcoe_stats(struct qed_dev *cdev, struct qed_fcoe_stats *stats) { - return qed_fcoe_get_stats(QED_AFFIN_HWFN(cdev), stats); + return qed_fcoe_stats_context(cdev, stats, false); } void qed_get_protocol_stats_fcoe(struct qed_dev *cdev, - struct qed_mcp_fcoe_stats *stats) + struct qed_mcp_fcoe_stats *stats, + bool is_atomic) { struct qed_fcoe_stats proto_stats; /* Retrieve FW statistics */ memset(&proto_stats, 0, sizeof(proto_stats)); - if (qed_fcoe_stats(cdev, &proto_stats)) { + if (qed_fcoe_stats_context(cdev, &proto_stats, is_atomic)) { DP_VERBOSE(cdev, QED_MSG_STORAGE, "Failed to collect FCoE statistics\n"); return; diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h index 19c85adf4ceb1..214e8299ecb4e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h +++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h @@ -28,8 +28,20 @@ int qed_fcoe_alloc(struct qed_hwfn *p_hwfn); void qed_fcoe_setup(struct qed_hwfn *p_hwfn); void qed_fcoe_free(struct qed_hwfn *p_hwfn); +/** + * qed_get_protocol_stats_fcoe(): Fills provided statistics + * struct with statistics. + * + * @cdev: Qed dev pointer. + * @stats: Points to struct that will be filled with statistics. + * @is_atomic: Hint from the caller - if the func can sleep or not. + * + * Context: The function should not sleep in case is_atomic == true. + * Return: Void. + */ void qed_get_protocol_stats_fcoe(struct qed_dev *cdev, - struct qed_mcp_fcoe_stats *stats); + struct qed_mcp_fcoe_stats *stats, + bool is_atomic); #else /* CONFIG_QED_FCOE */ static inline int qed_fcoe_alloc(struct qed_hwfn *p_hwfn) { @@ -40,7 +52,8 @@ static inline void qed_fcoe_setup(struct qed_hwfn *p_hwfn) {} static inline void qed_fcoe_free(struct qed_hwfn *p_hwfn) {} static inline void qed_get_protocol_stats_fcoe(struct qed_dev *cdev, - struct qed_mcp_fcoe_stats *stats) + struct qed_mcp_fcoe_stats *stats, + bool is_atomic) { } #endif /* CONFIG_QED_FCOE */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c index 554f30b0cfd5e..6263f847b6b92 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hw.c +++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c @@ -23,7 +23,10 @@ #include "qed_reg_addr.h" #include "qed_sriov.h" -#define QED_BAR_ACQUIRE_TIMEOUT 1000 +#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT 1000 +#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP 1000 +#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT 100000 +#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY 10 /* Invalid values */ #define QED_BAR_INVALID_OFFSET (cpu_to_le32(-1)) @@ -84,12 +87,22 @@ void qed_ptt_pool_free(struct qed_hwfn *p_hwfn) } struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn) +{ + return qed_ptt_acquire_context(p_hwfn, false); +} + +struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn, bool is_atomic) { struct qed_ptt *p_ptt; - unsigned int i; + unsigned int i, count; + + if (is_atomic) + count = QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT; + else + count = QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT; /* Take the free PTT from the list */ - for (i = 0; i < QED_BAR_ACQUIRE_TIMEOUT; i++) { + for (i = 0; i < count; i++) { spin_lock_bh(&p_hwfn->p_ptt_pool->lock); if (!list_empty(&p_hwfn->p_ptt_pool->free_list)) { @@ -105,7 +118,12 @@ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn) } spin_unlock_bh(&p_hwfn->p_ptt_pool->lock); - usleep_range(1000, 2000); + + if (is_atomic) + udelay(QED_BAR_ACQUIRE_TIMEOUT_UDELAY); + else + usleep_range(QED_BAR_ACQUIRE_TIMEOUT_USLEEP, + QED_BAR_ACQUIRE_TIMEOUT_USLEEP * 2); } DP_NOTICE(p_hwfn, "PTT acquire timeout - failed to allocate PTT\n"); diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c index 511ab214eb9c8..980e7289b4814 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c @@ -999,13 +999,14 @@ static void _qed_iscsi_get_pstats(struct qed_hwfn *p_hwfn, } static int qed_iscsi_get_stats(struct qed_hwfn *p_hwfn, - struct qed_iscsi_stats *stats) + struct qed_iscsi_stats *stats, + bool is_atomic) { struct qed_ptt *p_ptt; memset(stats, 0, sizeof(*stats)); - p_ptt = qed_ptt_acquire(p_hwfn); + p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic); if (!p_ptt) { DP_ERR(p_hwfn, "Failed to acquire ptt\n"); return -EAGAIN; @@ -1336,9 +1337,16 @@ static int qed_iscsi_destroy_conn(struct qed_dev *cdev, QED_SPQ_MODE_EBLOCK, NULL); } +static int qed_iscsi_stats_context(struct qed_dev *cdev, + struct qed_iscsi_stats *stats, + bool is_atomic) +{ + return qed_iscsi_get_stats(QED_AFFIN_HWFN(cdev), stats, is_atomic); +} + static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats *stats) { - return qed_iscsi_get_stats(QED_AFFIN_HWFN(cdev), stats); + return qed_iscsi_stats_context(cdev, stats, false); } static int qed_iscsi_change_mac(struct qed_dev *cdev, @@ -1358,13 +1366,14 @@ static int qed_iscsi_change_mac(struct qed_dev *cdev, } void qed_get_protocol_stats_iscsi(struct qed_dev *cdev, - struct qed_mcp_iscsi_stats *stats) + struct qed_mcp_iscsi_stats *stats, + bool is_atomic) { struct qed_iscsi_stats proto_stats; /* Retrieve FW statistics */ memset(&proto_stats, 0, sizeof(proto_stats)); - if (qed_iscsi_stats(cdev, &proto_stats)) { + if (qed_iscsi_stats_context(cdev, &proto_stats, is_atomic)) { DP_VERBOSE(cdev, QED_MSG_STORAGE, "Failed to collect ISCSI statistics\n"); return; diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h index dec2b00259d42..974cb8d26608c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h @@ -39,11 +39,14 @@ void qed_iscsi_free(struct qed_hwfn *p_hwfn); * * @cdev: Qed dev pointer. * @stats: Points to struct that will be filled with statistics. + * @is_atomic: Hint from the caller - if the func can sleep or not. * + * Context: The function should not sleep in case is_atomic == true. * Return: Void. */ void qed_get_protocol_stats_iscsi(struct qed_dev *cdev, - struct qed_mcp_iscsi_stats *stats); + struct qed_mcp_iscsi_stats *stats, + bool is_atomic); #else /* IS_ENABLED(CONFIG_QED_ISCSI) */ static inline int qed_iscsi_alloc(struct qed_hwfn *p_hwfn) { @@ -56,7 +59,8 @@ static inline void qed_iscsi_free(struct qed_hwfn *p_hwfn) {} static inline void qed_get_protocol_stats_iscsi(struct qed_dev *cdev, - struct qed_mcp_iscsi_stats *stats) {} + struct qed_mcp_iscsi_stats *stats, + bool is_atomic) {} #endif /* IS_ENABLED(CONFIG_QED_ISCSI) */ #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 7776d3bdd459a..970b9aabbc3d7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1863,7 +1863,8 @@ static void __qed_get_vport_stats(struct qed_hwfn *p_hwfn, } static void _qed_get_vport_stats(struct qed_dev *cdev, - struct qed_eth_stats *stats) + struct qed_eth_stats *stats, + bool is_atomic) { u8 fw_vport = 0; int i; @@ -1872,10 +1873,11 @@ static void _qed_get_vport_stats(struct qed_dev *cdev, for_each_hwfn(cdev, i) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; - struct qed_ptt *p_ptt = IS_PF(cdev) ? qed_ptt_acquire(p_hwfn) - : NULL; + struct qed_ptt *p_ptt; bool b_get_port_stats; + p_ptt = IS_PF(cdev) ? qed_ptt_acquire_context(p_hwfn, is_atomic) + : NULL; if (IS_PF(cdev)) { /* The main vport index is relative first */ if (qed_fw_vport(p_hwfn, 0, &fw_vport)) { @@ -1900,6 +1902,13 @@ out: } void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats) +{ + qed_get_vport_stats_context(cdev, stats, false); +} + +void qed_get_vport_stats_context(struct qed_dev *cdev, + struct qed_eth_stats *stats, + bool is_atomic) { u32 i; @@ -1908,7 +1917,7 @@ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats) return; } - _qed_get_vport_stats(cdev, stats); + _qed_get_vport_stats(cdev, stats, is_atomic); if (!cdev->reset_stats) return; @@ -1960,7 +1969,7 @@ void qed_reset_vport_stats(struct qed_dev *cdev) if (!cdev->reset_stats) { DP_INFO(cdev, "Reset stats not allocated\n"); } else { - _qed_get_vport_stats(cdev, cdev->reset_stats); + _qed_get_vport_stats(cdev, cdev->reset_stats, false); cdev->reset_stats->common.link_change_count = 0; } } diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h index a538cf478c14e..2d2f82c785ad2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h @@ -249,8 +249,32 @@ qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn, enum spq_mode comp_mode, struct qed_spq_comp_cb *p_comp_data); +/** + * qed_get_vport_stats(): Fills provided statistics + * struct with statistics. + * + * @cdev: Qed dev pointer. + * @stats: Points to struct that will be filled with statistics. + * + * Return: Void. + */ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats); +/** + * qed_get_vport_stats_context(): Fills provided statistics + * struct with statistics. + * + * @cdev: Qed dev pointer. + * @stats: Points to struct that will be filled with statistics. + * @is_atomic: Hint from the caller - if the func can sleep or not. + * + * Context: The function should not sleep in case is_atomic == true. + * Return: Void. + */ +void qed_get_vport_stats_context(struct qed_dev *cdev, + struct qed_eth_stats *stats, + bool is_atomic); + void qed_reset_vport_stats(struct qed_dev *cdev); /** diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index f5af83342856f..c278f8893042b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -3092,7 +3092,7 @@ void qed_get_protocol_stats(struct qed_dev *cdev, switch (type) { case QED_MCP_LAN_STATS: - qed_get_vport_stats(cdev, ð_stats); + qed_get_vport_stats_context(cdev, ð_stats, true); stats->lan_stats.ucast_rx_pkts = eth_stats.common.rx_ucast_pkts; stats->lan_stats.ucast_tx_pkts = @@ -3100,10 +3100,10 @@ void qed_get_protocol_stats(struct qed_dev *cdev, stats->lan_stats.fcs_err = -1; break; case QED_MCP_FCOE_STATS: - qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats); + qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats, true); break; case QED_MCP_ISCSI_STATS: - qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats); + qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats, true); break; default: DP_VERBOSE(cdev, QED_MSG_SP, diff --git a/drivers/net/ethernet/sfc/siena/tx_common.c b/drivers/net/ethernet/sfc/siena/tx_common.c index 93a32d61944f0..a7a9ab304e136 100644 --- a/drivers/net/ethernet/sfc/siena/tx_common.c +++ b/drivers/net/ethernet/sfc/siena/tx_common.c @@ -12,6 +12,7 @@ #include "efx.h" #include "nic_common.h" #include "tx_common.h" +#include static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue) { diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c index 755aa92bf8236..9f2393d343715 100644 --- a/drivers/net/ethernet/sfc/tx_common.c +++ b/drivers/net/ethernet/sfc/tx_common.c @@ -12,6 +12,7 @@ #include "efx.h" #include "nic_common.h" #include "tx_common.h" +#include static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue) { diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 2d7347b71c41b..0dcd6a568b061 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -1851,6 +1851,17 @@ static int netsec_of_probe(struct platform_device *pdev, return err; } + /* + * SynQuacer is physically configured with TX and RX delays + * but the standard firmware claimed otherwise for a long + * time, ignore it. + */ + if (of_machine_is_compatible("socionext,developer-box") && + priv->phy_interface != PHY_INTERFACE_MODE_RGMII_ID) { + dev_warn(&pdev->dev, "Outdated firmware reports incorrect PHY mode, overriding\n"); + priv->phy_interface = PHY_INTERFACE_MODE_RGMII_ID; + } + priv->phy_np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); if (!priv->phy_np) { dev_err(&pdev->dev, "missing required property 'phy-handle'\n"); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c index bdf990cf2f310..0880048ccdddc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c @@ -234,7 +234,8 @@ static int tegra_mgbe_probe(struct platform_device *pdev) res.addr = mgbe->regs; res.irq = irq; - mgbe->clks = devm_kzalloc(&pdev->dev, sizeof(*mgbe->clks), GFP_KERNEL); + mgbe->clks = devm_kcalloc(&pdev->dev, ARRAY_SIZE(mgbe_clks), + sizeof(*mgbe->clks), GFP_KERNEL); if (!mgbe->clks) return -ENOMEM; diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index a6211b95ed178..3525d5c0d694c 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -25,6 +25,7 @@ #endif #include +#include #include #include diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index e0ac1bcd9925c..49f303353ecb0 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1567,12 +1567,16 @@ static int temac_probe(struct platform_device *pdev) } /* Error handle returned DMA RX and TX interrupts */ - if (lp->rx_irq < 0) - return dev_err_probe(&pdev->dev, lp->rx_irq, + if (lp->rx_irq <= 0) { + rc = lp->rx_irq ?: -EINVAL; + return dev_err_probe(&pdev->dev, rc, "could not get DMA RX irq\n"); - if (lp->tx_irq < 0) - return dev_err_probe(&pdev->dev, lp->tx_irq, + } + if (lp->tx_irq <= 0) { + rc = lp->tx_irq ?: -EINVAL; + return dev_err_probe(&pdev->dev, rc, "could not get DMA TX irq\n"); + } if (temac_np) { /* Retrieve the MAC address */ diff --git a/drivers/net/tap.c b/drivers/net/tap.c index d30d730ed5a71..49d1d6acf95eb 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -533,7 +534,7 @@ static int tap_open(struct inode *inode, struct file *file) q->sock.state = SS_CONNECTED; q->sock.file = file; q->sock.ops = &tap_socket_ops; - sock_init_data_uid(&q->sock, &q->sk, inode->i_uid); + sock_init_data_uid(&q->sock, &q->sk, current_fsuid()); q->sk.sk_write_space = tap_sock_write_space; q->sk.sk_destruct = tap_sock_destruct; q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index d75456adc62ac..25f0191df00bf 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3469,7 +3469,7 @@ static int tun_chr_open(struct inode *inode, struct file * file) tfile->socket.file = file; tfile->socket.ops = &tun_socket_ops; - sock_init_data_uid(&tfile->socket, &tfile->sk, inode->i_uid); + sock_init_data_uid(&tfile->socket, &tfile->sk, current_fsuid()); tfile->sk.sk_write_space = tun_sock_write_space; tfile->sk.sk_sndbuf = INT_MAX; diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 80849d115e5dd..c1a75ef4fd68c 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -618,9 +618,23 @@ static const struct usb_device_id products[] = { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, + .idProduct = 0x8005, /* A-300 */ + ZAURUS_FAKE_INTERFACE, + .driver_info = 0, +}, { + .match_flags = USB_DEVICE_ID_MATCH_INT_INFO + | USB_DEVICE_ID_MATCH_DEVICE, + .idVendor = 0x04DD, .idProduct = 0x8006, /* B-500/SL-5600 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, +}, { + .match_flags = USB_DEVICE_ID_MATCH_INT_INFO + | USB_DEVICE_ID_MATCH_DEVICE, + .idVendor = 0x04DD, + .idProduct = 0x8006, /* B-500/SL-5600 */ + ZAURUS_FAKE_INTERFACE, + .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, @@ -628,6 +642,13 @@ static const struct usb_device_id products[] = { .idProduct = 0x8007, /* C-700 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, +}, { + .match_flags = USB_DEVICE_ID_MATCH_INT_INFO + | USB_DEVICE_ID_MATCH_DEVICE, + .idVendor = 0x04DD, + .idProduct = 0x8007, /* C-700 */ + ZAURUS_FAKE_INTERFACE, + .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index c458c030fadf6..59cde06aa7f60 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -4224,8 +4224,6 @@ static void lan78xx_disconnect(struct usb_interface *intf) if (!dev) return; - set_bit(EVENT_DEV_DISCONNECT, &dev->flags); - netif_napi_del(&dev->napi); udev = interface_to_usbdev(intf); @@ -4233,6 +4231,8 @@ static void lan78xx_disconnect(struct usb_interface *intf) unregister_netdev(net); + timer_shutdown_sync(&dev->stat_monitor); + set_bit(EVENT_DEV_DISCONNECT, &dev->flags); cancel_delayed_work_sync(&dev->wq); phydev = net->phydev; @@ -4247,9 +4247,6 @@ static void lan78xx_disconnect(struct usb_interface *intf) usb_scuttle_anchored_urbs(&dev->deferred); - if (timer_pending(&dev->stat_monitor)) - del_timer_sync(&dev->stat_monitor); - lan78xx_unbind(dev, intf); lan78xx_free_tx_resources(dev); diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 0999a58ca9d26..0738baa5b82e4 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -27,6 +27,7 @@ #include #include #include +#include /* Information for net-next */ #define NETNEXT_VERSION "12" diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 283ffddda821d..2d14b0d78541a 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1775,6 +1775,10 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) } else if (!info->in || !info->out) status = usbnet_get_endpoints (dev, udev); else { + u8 ep_addrs[3] = { + info->in + USB_DIR_IN, info->out + USB_DIR_OUT, 0 + }; + dev->in = usb_rcvbulkpipe (xdev, info->in); dev->out = usb_sndbulkpipe (xdev, info->out); if (!(info->flags & FLAG_NO_SETINT)) @@ -1784,6 +1788,8 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) else status = 0; + if (status == 0 && !usb_check_bulk_endpoints(udev, ep_addrs)) + status = -EINVAL; } if (status >= 0 && dev->status) status = init_status (dev, udev); diff --git a/drivers/net/usb/zaurus.c b/drivers/net/usb/zaurus.c index 7984f2157d222..df3617c4c44e8 100644 --- a/drivers/net/usb/zaurus.c +++ b/drivers/net/usb/zaurus.c @@ -289,9 +289,23 @@ static const struct usb_device_id products [] = { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, + .idProduct = 0x8005, /* A-300 */ + ZAURUS_FAKE_INTERFACE, + .driver_info = (unsigned long)&bogus_mdlm_info, +}, { + .match_flags = USB_DEVICE_ID_MATCH_INT_INFO + | USB_DEVICE_ID_MATCH_DEVICE, + .idVendor = 0x04DD, .idProduct = 0x8006, /* B-500/SL-5600 */ ZAURUS_MASTER_INTERFACE, .driver_info = ZAURUS_PXA_INFO, +}, { + .match_flags = USB_DEVICE_ID_MATCH_INT_INFO + | USB_DEVICE_ID_MATCH_DEVICE, + .idVendor = 0x04DD, + .idProduct = 0x8006, /* B-500/SL-5600 */ + ZAURUS_FAKE_INTERFACE, + .driver_info = (unsigned long)&bogus_mdlm_info, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, @@ -299,6 +313,13 @@ static const struct usb_device_id products [] = { .idProduct = 0x8007, /* C-700 */ ZAURUS_MASTER_INTERFACE, .driver_info = ZAURUS_PXA_INFO, +}, { + .match_flags = USB_DEVICE_ID_MATCH_INT_INFO + | USB_DEVICE_ID_MATCH_DEVICE, + .idVendor = 0x04DD, + .idProduct = 0x8007, /* C-700 */ + ZAURUS_FAKE_INTERFACE, + .driver_info = (unsigned long)&bogus_mdlm_info, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index d58e9f818d3b7..258dcc1039216 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 00719e1304386..682733193d3de 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c index 68e88224b8b1f..ccedea7e8a50d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c @@ -128,12 +128,12 @@ mt7615_eeprom_parse_hw_band_cap(struct mt7615_dev *dev) case MT_EE_5GHZ: dev->mphy.cap.has_5ghz = true; break; - case MT_EE_2GHZ: - dev->mphy.cap.has_2ghz = true; - break; case MT_EE_DBDC: dev->dbdc_support = true; fallthrough; + case MT_EE_2GHZ: + dev->mphy.cap.has_2ghz = true; + break; default: dev->mphy.cap.has_2ghz = true; dev->mphy.cap.has_5ghz = true; diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 1d195429753dd..613eab7297046 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -716,7 +716,6 @@ struct qeth_card_info { u16 chid; u8 ids_valid:1; /* cssid,iid,chid */ u8 dev_addr_is_registered:1; - u8 open_when_online:1; u8 promisc_mode:1; u8 use_v1_blkt:1; u8 is_vm_nic:1; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 1d5b207c2b9e9..cd783290bde5e 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5373,8 +5373,6 @@ int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc, qeth_clear_ipacmd_list(card); rtnl_lock(); - card->info.open_when_online = card->dev->flags & IFF_UP; - dev_close(card->dev); netif_device_detach(card->dev); netif_carrier_off(card->dev); rtnl_unlock(); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 9f13ed170a437..75910c0bcc2bc 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -2388,9 +2388,12 @@ static int qeth_l2_set_online(struct qeth_card *card, bool carrier_ok) qeth_enable_hw_features(dev); qeth_l2_enable_brport_features(card); - if (card->info.open_when_online) { - card->info.open_when_online = 0; - dev_open(dev, NULL); + if (netif_running(dev)) { + local_bh_disable(); + napi_schedule(&card->napi); + /* kick-start the NAPI softirq: */ + local_bh_enable(); + qeth_l2_set_rx_mode(dev); } rtnl_unlock(); } diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index af4e60d2917e9..b92a32b4b1141 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2018,9 +2018,11 @@ static int qeth_l3_set_online(struct qeth_card *card, bool carrier_ok) netif_device_attach(dev); qeth_enable_hw_features(dev); - if (card->info.open_when_online) { - card->info.open_when_online = 0; - dev_open(dev, NULL); + if (netif_running(dev)) { + local_bh_disable(); + napi_schedule(&card->napi); + /* kick-start the NAPI softirq: */ + local_bh_enable(); } rtnl_unlock(); } diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index f21307537829b..4f0d0e55f0d46 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -534,8 +534,7 @@ static void zfcp_fc_adisc_handler(void *data) /* re-init to undo drop from zfcp_fc_adisc() */ port->d_id = ntoh24(adisc_resp->adisc_port_id); - /* port is good, unblock rport without going through erp */ - zfcp_scsi_schedule_rport_register(port); + /* port is still good, nothing to do */ out: atomic_andnot(ZFCP_STATUS_PORT_LINK_TEST, &port->status); put_device(&port->dev); @@ -595,9 +594,6 @@ void zfcp_fc_link_test_work(struct work_struct *work) int retval; set_worker_desc("zadisc%16llx", port->wwpn); /* < WORKER_DESC_LEN=24 */ - get_device(&port->dev); - port->rport_task = RPORT_DEL; - zfcp_scsi_rport_work(&port->rport_work); /* only issue one test command at one time per port */ if (atomic_read(&port->status) & ZFCP_STATUS_PORT_LINK_TEST) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 659196a2f63ad..4d72d82f73586 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -365,6 +365,7 @@ static void storvsc_on_channel_callback(void *context); #define STORVSC_FC_MAX_LUNS_PER_TARGET 255 #define STORVSC_FC_MAX_TARGETS 128 #define STORVSC_FC_MAX_CHANNELS 8 +#define STORVSC_FC_MAX_XFER_SIZE ((u32)(512 * 1024)) #define STORVSC_IDE_MAX_LUNS_PER_TARGET 64 #define STORVSC_IDE_MAX_TARGETS 1 @@ -2004,6 +2005,9 @@ static int storvsc_probe(struct hv_device *device, * protecting it from any weird value. */ max_xfer_bytes = round_down(stor_device->max_transfer_bytes, HV_HYP_PAGE_SIZE); + if (is_fc) + max_xfer_bytes = min(max_xfer_bytes, STORVSC_FC_MAX_XFER_SIZE); + /* max_hw_sectors_kb */ host->max_sectors = max_xfer_bytes >> 9; /* diff --git a/drivers/soc/imx/imx8mp-blk-ctrl.c b/drivers/soc/imx/imx8mp-blk-ctrl.c index 870aecc0202ae..1c1fcab4979a4 100644 --- a/drivers/soc/imx/imx8mp-blk-ctrl.c +++ b/drivers/soc/imx/imx8mp-blk-ctrl.c @@ -164,7 +164,7 @@ static int imx8mp_hsio_blk_ctrl_probe(struct imx8mp_blk_ctrl *bc) clk_hsio_pll->hw.init = &init; hw = &clk_hsio_pll->hw; - ret = devm_clk_hw_register(bc->dev, hw); + ret = devm_clk_hw_register(bc->bus_power_dev, hw); if (ret) return ret; diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 152b3ec911599..ad14dd745e4ae 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -499,12 +499,16 @@ static void fragment_free_space(struct btrfs_block_group *block_group) * used yet since their free space will be released as soon as the transaction * commits. */ -u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end) +int add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end, + u64 *total_added_ret) { struct btrfs_fs_info *info = block_group->fs_info; - u64 extent_start, extent_end, size, total_added = 0; + u64 extent_start, extent_end, size; int ret; + if (total_added_ret) + *total_added_ret = 0; + while (start < end) { ret = find_first_extent_bit(&info->excluded_extents, start, &extent_start, &extent_end, @@ -517,10 +521,12 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end start = extent_end + 1; } else if (extent_start > start && extent_start < end) { size = extent_start - start; - total_added += size; ret = btrfs_add_free_space_async_trimmed(block_group, start, size); - BUG_ON(ret); /* -ENOMEM or logic error */ + if (ret) + return ret; + if (total_added_ret) + *total_added_ret += size; start = extent_end + 1; } else { break; @@ -529,13 +535,15 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end if (start < end) { size = end - start; - total_added += size; ret = btrfs_add_free_space_async_trimmed(block_group, start, size); - BUG_ON(ret); /* -ENOMEM or logic error */ + if (ret) + return ret; + if (total_added_ret) + *total_added_ret += size; } - return total_added; + return 0; } /* @@ -779,8 +787,13 @@ next: if (key.type == BTRFS_EXTENT_ITEM_KEY || key.type == BTRFS_METADATA_ITEM_KEY) { - total_found += add_new_free_space(block_group, last, - key.objectid); + u64 space_added; + + ret = add_new_free_space(block_group, last, key.objectid, + &space_added); + if (ret) + goto out; + total_found += space_added; if (key.type == BTRFS_METADATA_ITEM_KEY) last = key.objectid + fs_info->nodesize; @@ -795,11 +808,10 @@ next: } path->slots[0]++; } - ret = 0; - - total_found += add_new_free_space(block_group, last, - block_group->start + block_group->length); + ret = add_new_free_space(block_group, last, + block_group->start + block_group->length, + NULL); out: btrfs_free_path(path); return ret; @@ -2290,9 +2302,11 @@ static int read_one_block_group(struct btrfs_fs_info *info, btrfs_free_excluded_extents(cache); } else if (cache->used == 0) { cache->cached = BTRFS_CACHE_FINISHED; - add_new_free_space(cache, cache->start, - cache->start + cache->length); + ret = add_new_free_space(cache, cache->start, + cache->start + cache->length, NULL); btrfs_free_excluded_extents(cache); + if (ret) + goto error; } ret = btrfs_add_block_group_cache(info, cache); @@ -2728,9 +2742,12 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran return ERR_PTR(ret); } - add_new_free_space(cache, chunk_offset, chunk_offset + size); - + ret = add_new_free_space(cache, chunk_offset, chunk_offset + size, NULL); btrfs_free_excluded_extents(cache); + if (ret) { + btrfs_put_block_group(cache); + return ERR_PTR(ret); + } /* * Ensure the corresponding space_info object is created and diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index cc0e4b37db2da..3195d0b0dbed8 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -277,8 +277,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait); void btrfs_put_caching_control(struct btrfs_caching_control *ctl); struct btrfs_caching_control *btrfs_get_caching_control( struct btrfs_block_group *cache); -u64 add_new_free_space(struct btrfs_block_group *block_group, - u64 start, u64 end); +int add_new_free_space(struct btrfs_block_group *block_group, + u64 start, u64 end, u64 *total_added_ret); struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( struct btrfs_fs_info *fs_info, const u64 chunk_offset); diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 045ddce32eca4..f169378e2ca6e 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1515,9 +1515,13 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, if (prev_bit == 0 && bit == 1) { extent_start = offset; } else if (prev_bit == 1 && bit == 0) { - total_found += add_new_free_space(block_group, - extent_start, - offset); + u64 space_added; + + ret = add_new_free_space(block_group, extent_start, + offset, &space_added); + if (ret) + goto out; + total_found += space_added; if (total_found > CACHING_CTL_WAKE_UP) { total_found = 0; wake_up(&caching_ctl->wait); @@ -1529,8 +1533,9 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, } } if (prev_bit == 1) { - total_found += add_new_free_space(block_group, extent_start, - end); + ret = add_new_free_space(block_group, extent_start, end, NULL); + if (ret) + goto out; extent_count++; } @@ -1569,6 +1574,8 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, end = block_group->start + block_group->length; while (1) { + u64 space_added; + ret = btrfs_next_item(root, path); if (ret < 0) goto out; @@ -1583,8 +1590,11 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); ASSERT(key.objectid < end && key.objectid + key.offset <= end); - total_found += add_new_free_space(block_group, key.objectid, - key.objectid + key.offset); + ret = add_new_free_space(block_group, key.objectid, + key.objectid + key.offset, &space_added); + if (ret) + goto out; + total_found += space_added; if (total_found > CACHING_CTL_WAKE_UP) { total_found = 0; wake_up(&caching_ctl->wait); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 4c0f22acf53d2..83c4abff496da 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4762,7 +4762,7 @@ static void delayed_work(struct work_struct *work) dout("mdsc delayed_work\n"); - if (mdsc->stopping) + if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED) return; mutex_lock(&mdsc->mutex); @@ -4941,7 +4941,7 @@ void send_flush_mdlog(struct ceph_mds_session *s) void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) { dout("pre_umount\n"); - mdsc->stopping = 1; + mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN; ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true); ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 724307ff89cd9..86d2965e68a1f 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -380,6 +380,11 @@ struct cap_wait { int want; }; +enum { + CEPH_MDSC_STOPPING_BEGIN = 1, + CEPH_MDSC_STOPPING_FLUSHED = 2, +}; + /* * mds client state */ diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 3fc48b43cab0a..a5f52013314d6 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1374,6 +1374,16 @@ static void ceph_kill_sb(struct super_block *s) ceph_mdsc_pre_umount(fsc->mdsc); flush_fs_workqueues(fsc); + /* + * Though the kill_anon_super() will finally trigger the + * sync_filesystem() anyway, we still need to do it here + * and then bump the stage of shutdown to stop the work + * queue as earlier as possible. + */ + sync_filesystem(s); + + fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED; + kill_anon_super(s); fsc->client->extra_mon_dispatch = NULL; diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 4a1c238600c52..470988bb7867e 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1110,10 +1110,11 @@ static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be, struct z_erofs_bvec *bvec) { struct z_erofs_bvec_item *item; + unsigned int pgnr; - if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK)) { - unsigned int pgnr; - + if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK) && + (bvec->end == PAGE_SIZE || + bvec->offset + bvec->end == be->pcl->length)) { pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT; DBG_BUGON(pgnr >= be->nr_pages); if (!be->decompressed_pages[pgnr]) { diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c index 9f42f25fab920..e918decb37358 100644 --- a/fs/exfat/balloc.c +++ b/fs/exfat/balloc.c @@ -69,7 +69,7 @@ static int exfat_allocate_bitmap(struct super_block *sb, } sbi->map_sectors = ((need_map_size - 1) >> (sb->s_blocksize_bits)) + 1; - sbi->vol_amap = kmalloc_array(sbi->map_sectors, + sbi->vol_amap = kvmalloc_array(sbi->map_sectors, sizeof(struct buffer_head *), GFP_KERNEL); if (!sbi->vol_amap) return -ENOMEM; @@ -84,7 +84,7 @@ static int exfat_allocate_bitmap(struct super_block *sb, while (j < i) brelse(sbi->vol_amap[j++]); - kfree(sbi->vol_amap); + kvfree(sbi->vol_amap); sbi->vol_amap = NULL; return -EIO; } @@ -138,7 +138,7 @@ void exfat_free_bitmap(struct exfat_sb_info *sbi) for (i = 0; i < sbi->map_sectors; i++) __brelse(sbi->vol_amap[i]); - kfree(sbi->vol_amap); + kvfree(sbi->vol_amap); } int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 957574180a5e3..598081d0d0595 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -34,6 +34,7 @@ static int exfat_get_uniname_from_ext_entry(struct super_block *sb, { int i, err; struct exfat_entry_set_cache es; + unsigned int uni_len = 0, len; err = exfat_get_dentry_set(&es, sb, p_dir, entry, ES_ALL_ENTRIES); if (err) @@ -52,7 +53,10 @@ static int exfat_get_uniname_from_ext_entry(struct super_block *sb, if (exfat_get_entry_type(ep) != TYPE_EXTEND) break; - exfat_extract_uni_name(ep, uniname); + len = exfat_extract_uni_name(ep, uniname); + uni_len += len; + if (len != EXFAT_FILE_NAME_LEN || uni_len >= MAX_NAME_LENGTH) + break; uniname += EXFAT_FILE_NAME_LEN; } @@ -214,7 +218,10 @@ static void exfat_free_namebuf(struct exfat_dentry_namebuf *nb) exfat_init_namebuf(nb); } -/* skip iterating emit_dots when dir is empty */ +/* + * Before calling dir_emit*(), sbi->s_lock should be released + * because page fault can occur in dir_emit*(). + */ #define ITER_POS_FILLED_DOTS (2) static int exfat_iterate(struct file *file, struct dir_context *ctx) { @@ -229,11 +236,10 @@ static int exfat_iterate(struct file *file, struct dir_context *ctx) int err = 0, fake_offset = 0; exfat_init_namebuf(nb); - mutex_lock(&EXFAT_SB(sb)->s_lock); cpos = ctx->pos; if (!dir_emit_dots(file, ctx)) - goto unlock; + goto out; if (ctx->pos == ITER_POS_FILLED_DOTS) { cpos = 0; @@ -245,16 +251,18 @@ static int exfat_iterate(struct file *file, struct dir_context *ctx) /* name buffer should be allocated before use */ err = exfat_alloc_namebuf(nb); if (err) - goto unlock; + goto out; get_new: + mutex_lock(&EXFAT_SB(sb)->s_lock); + if (ei->flags == ALLOC_NO_FAT_CHAIN && cpos >= i_size_read(inode)) goto end_of_dir; err = exfat_readdir(inode, &cpos, &de); if (err) { /* - * At least we tried to read a sector. Move cpos to next sector - * position (should be aligned). + * At least we tried to read a sector. + * Move cpos to next sector position (should be aligned). */ if (err == -EIO) { cpos += 1 << (sb->s_blocksize_bits); @@ -277,16 +285,10 @@ get_new: inum = iunique(sb, EXFAT_ROOT_INO); } - /* - * Before calling dir_emit(), sb_lock should be released. - * Because page fault can occur in dir_emit() when the size - * of buffer given from user is larger than one page size. - */ mutex_unlock(&EXFAT_SB(sb)->s_lock); if (!dir_emit(ctx, nb->lfn, strlen(nb->lfn), inum, (de.attr & ATTR_SUBDIR) ? DT_DIR : DT_REG)) - goto out_unlocked; - mutex_lock(&EXFAT_SB(sb)->s_lock); + goto out; ctx->pos = cpos; goto get_new; @@ -294,9 +296,8 @@ end_of_dir: if (!cpos && fake_offset) cpos = ITER_POS_FILLED_DOTS; ctx->pos = cpos; -unlock: mutex_unlock(&EXFAT_SB(sb)->s_lock); -out_unlocked: +out: /* * To improve performance, free namebuf after unlock sb_lock. * If namebuf is not allocated, this function do nothing @@ -1079,7 +1080,8 @@ rewind: if (entry_type == TYPE_EXTEND) { unsigned short entry_uniname[16], unichar; - if (step != DIRENT_STEP_NAME) { + if (step != DIRENT_STEP_NAME || + name_len >= MAX_NAME_LENGTH) { step = DIRENT_STEP_FILE; continue; } diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 8244366862e4c..11572cc60d0e9 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -70,10 +70,7 @@ struct mb_cache; * second extended-fs super-block data in memory */ struct ext2_sb_info { - unsigned long s_frag_size; /* Size of a fragment in bytes */ - unsigned long s_frags_per_block;/* Number of fragments per block */ unsigned long s_inodes_per_block;/* Number of inodes per block */ - unsigned long s_frags_per_group;/* Number of fragments in a group */ unsigned long s_blocks_per_group;/* Number of blocks in a group */ unsigned long s_inodes_per_group;/* Number of inodes in a group */ unsigned long s_itb_per_group; /* Number of inode table blocks per group */ @@ -188,15 +185,6 @@ static inline struct ext2_sb_info *EXT2_SB(struct super_block *sb) #define EXT2_INODE_SIZE(s) (EXT2_SB(s)->s_inode_size) #define EXT2_FIRST_INO(s) (EXT2_SB(s)->s_first_ino) -/* - * Macro-instructions used to manage fragments - */ -#define EXT2_MIN_FRAG_SIZE 1024 -#define EXT2_MAX_FRAG_SIZE 4096 -#define EXT2_MIN_FRAG_LOG_SIZE 10 -#define EXT2_FRAG_SIZE(s) (EXT2_SB(s)->s_frag_size) -#define EXT2_FRAGS_PER_BLOCK(s) (EXT2_SB(s)->s_frags_per_block) - /* * Structure of a blocks group descriptor */ diff --git a/fs/ext2/super.c b/fs/ext2/super.c index f342f347a695f..2959afc7541c7 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -668,10 +668,9 @@ static int ext2_setup_super (struct super_block * sb, es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT); le16_add_cpu(&es->s_mnt_count, 1); if (test_opt (sb, DEBUG)) - ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, " + ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, gc=%lu, " "bpg=%lu, ipg=%lu, mo=%04lx]", EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize, - sbi->s_frag_size, sbi->s_groups_count, EXT2_BLOCKS_PER_GROUP(sb), EXT2_INODES_PER_GROUP(sb), @@ -1012,14 +1011,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) } } - sbi->s_frag_size = EXT2_MIN_FRAG_SIZE << - le32_to_cpu(es->s_log_frag_size); - if (sbi->s_frag_size == 0) - goto cantfind_ext2; - sbi->s_frags_per_block = sb->s_blocksize / sbi->s_frag_size; - sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); - sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb); @@ -1045,11 +1037,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } - if (sb->s_blocksize != sbi->s_frag_size) { + if (es->s_log_frag_size != es->s_log_block_size) { ext2_msg(sb, KERN_ERR, - "error: fragsize %lu != blocksize %lu" - "(not supported yet)", - sbi->s_frag_size, sb->s_blocksize); + "error: fragsize log %u != blocksize log %u", + le32_to_cpu(es->s_log_frag_size), sb->s_blocksize_bits); goto failed_mount; } @@ -1066,12 +1057,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi->s_blocks_per_group, sbi->s_inodes_per_group + 3); goto failed_mount; } - if (sbi->s_frags_per_group > sb->s_blocksize * 8) { - ext2_msg(sb, KERN_ERR, - "error: #fragments per group too big: %lu", - sbi->s_frags_per_group); - goto failed_mount; - } if (sbi->s_inodes_per_group < sbi->s_inodes_per_block || sbi->s_inodes_per_group > sb->s_blocksize * 8) { ext2_msg(sb, KERN_ERR, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d867056a01f65..271d4e7b22c91 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3445,7 +3445,6 @@ static inline bool __is_valid_data_blkaddr(block_t blkaddr) * file.c */ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); -void f2fs_truncate_data_blocks(struct dnode_of_data *dn); int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock); int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock); int f2fs_truncate(struct inode *inode); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 015ed274dc312..ead75c4e833d2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -627,11 +627,6 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) dn->ofs_in_node, nr_free); } -void f2fs_truncate_data_blocks(struct dnode_of_data *dn) -{ - f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode)); -} - static int truncate_partial_data_page(struct inode *inode, u64 from, bool cache_only) { diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6bdb1bed29ec9..f8e1fd32e3e4f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -925,6 +925,7 @@ static int truncate_node(struct dnode_of_data *dn) static int truncate_dnode(struct dnode_of_data *dn) { + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct page *page; int err; @@ -932,16 +933,25 @@ static int truncate_dnode(struct dnode_of_data *dn) return 1; /* get direct node */ - page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid); + page = f2fs_get_node_page(sbi, dn->nid); if (PTR_ERR(page) == -ENOENT) return 1; else if (IS_ERR(page)) return PTR_ERR(page); + if (IS_INODE(page) || ino_of_node(page) != dn->inode->i_ino) { + f2fs_err(sbi, "incorrect node reference, ino: %lu, nid: %u, ino_of_node: %u", + dn->inode->i_ino, dn->nid, ino_of_node(page)); + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_handle_error(sbi, ERROR_INVALID_NODE_REFERENCE); + f2fs_put_page(page, 1); + return -EFSCORRUPTED; + } + /* Make dnode_of_data for parameter */ dn->node_page = page; dn->ofs_in_node = 0; - f2fs_truncate_data_blocks(dn); + f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode)); err = truncate_node(dn); if (err) { f2fs_put_page(page, 1); diff --git a/fs/file.c b/fs/file.c index 35c62b54c9d65..dbca26ef7a01a 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1036,12 +1036,28 @@ unsigned long __fdget_raw(unsigned int fd) return __fget_light(fd, 0); } +/* + * Try to avoid f_pos locking. We only need it if the + * file is marked for FMODE_ATOMIC_POS, and it can be + * accessed multiple ways. + * + * Always do it for directories, because pidfd_getfd() + * can make a file accessible even if it otherwise would + * not be, and for directories this is a correctness + * issue, not a "POSIX requirement". + */ +static inline bool file_needs_f_pos_lock(struct file *file) +{ + return (file->f_mode & FMODE_ATOMIC_POS) && + (file_count(file) > 1 || S_ISDIR(file_inode(file)->i_mode)); +} + unsigned long __fdget_pos(unsigned int fd) { unsigned long v = __fdget(fd); struct file *file = (struct file *)(v & ~3); - if (file && (file->f_mode & FMODE_ATOMIC_POS)) { + if (file && file_needs_f_pos_lock(file)) { v |= FDPUT_POS_UNLOCK; mutex_lock(&file->f_pos_lock); } diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c index c0c6bcbc8c05c..81c22df27c725 100644 --- a/fs/ntfs3/attrlist.c +++ b/fs/ntfs3/attrlist.c @@ -52,7 +52,7 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr) if (!attr->non_res) { lsize = le32_to_cpu(attr->res.data_size); - le = kmalloc(al_aligned(lsize), GFP_NOFS); + le = kmalloc(al_aligned(lsize), GFP_NOFS | __GFP_NOWARN); if (!le) { err = -ENOMEM; goto out; @@ -80,7 +80,7 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr) if (err < 0) goto out; - le = kmalloc(al_aligned(lsize), GFP_NOFS); + le = kmalloc(al_aligned(lsize), GFP_NOFS | __GFP_NOWARN); if (!le) { err = -ENOMEM; goto out; diff --git a/fs/open.c b/fs/open.c index 4478adcc4f3a0..15ab413d03458 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1271,7 +1271,7 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op) lookup_flags |= LOOKUP_IN_ROOT; if (how->resolve & RESOLVE_CACHED) { /* Don't bother even trying for create/truncate/tmpfile open */ - if (flags & (O_TRUNC | O_CREAT | O_TMPFILE)) + if (flags & (O_TRUNC | O_CREAT | __O_TMPFILE)) return -EAGAIN; lookup_flags |= LOOKUP_CACHED; } diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c index cf83617236d8b..a9410e976bb07 100644 --- a/fs/smb/client/dfs.c +++ b/fs/smb/client/dfs.c @@ -178,8 +178,12 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); rc = dfs_get_referral(mnt_ctx, ref_path + 1, NULL, &tl); - if (rc) + if (rc) { + rc = cifs_mount_get_tcon(mnt_ctx); + if (!rc) + rc = cifs_is_path_remote(mnt_ctx); break; + } tit = dfs_cache_get_tgt_iterator(&tl); if (!tit) { diff --git a/fs/super.c b/fs/super.c index 04bc62ab7dfea..5c72c59c5153b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -903,6 +903,7 @@ int reconfigure_super(struct fs_context *fc) struct super_block *sb = fc->root->d_sb; int retval; bool remount_ro = false; + bool remount_rw = false; bool force = fc->sb_flags & SB_FORCE; if (fc->sb_flags_mask & ~MS_RMT_MASK) @@ -920,7 +921,7 @@ int reconfigure_super(struct fs_context *fc) bdev_read_only(sb->s_bdev)) return -EACCES; #endif - + remount_rw = !(fc->sb_flags & SB_RDONLY) && sb_rdonly(sb); remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb); } @@ -950,6 +951,14 @@ int reconfigure_super(struct fs_context *fc) if (retval) return retval; } + } else if (remount_rw) { + /* + * We set s_readonly_remount here to protect filesystem's + * reconfigure code from writes from userspace until + * reconfigure finishes. + */ + sb->s_readonly_remount = 1; + smp_wmb(); } if (fc->ops->reconfigure) { diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index b22764fe669c8..58d7f43a13712 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -145,6 +145,10 @@ static int alloc_branch(struct inode *inode, */ parent = block_to_cpu(SYSV_SB(inode->i_sb), branch[n-1].key); bh = sb_getblk(inode->i_sb, parent); + if (!bh) { + sysv_free_block(inode->i_sb, branch[n].key); + break; + } lock_buffer(bh); memset(bh->b_data, 0, blocksize); branch[n].bh = bh; diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h index 20c93f08c9933..95a1d214108a5 100644 --- a/include/asm-generic/word-at-a-time.h +++ b/include/asm-generic/word-at-a-time.h @@ -38,7 +38,7 @@ static inline long find_zero(unsigned long mask) return (mask >> 8) ? byte : byte + 1; } -static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) +static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) { unsigned long rhs = val | c->low_bits; *data = rhs; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 1d6402529d10c..a82a4bb6ce68b 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -103,6 +103,7 @@ enum f2fs_error { ERROR_INCONSISTENT_SIT, ERROR_CORRUPTED_VERITY_XATTR, ERROR_CORRUPTED_XATTR, + ERROR_INVALID_NODE_REFERENCE, ERROR_MAX, }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 68adc8af29efb..9291c04a2e09d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4827,13 +4827,6 @@ int skb_crc32c_csum_help(struct sk_buff *skb); int skb_csum_hwoffload_help(struct sk_buff *skb, const netdev_features_t features); -struct sk_buff *__skb_gso_segment(struct sk_buff *skb, - netdev_features_t features, bool tx_path); -struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb, - netdev_features_t features, __be16 type); -struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, - netdev_features_t features); - struct netdev_bonding_info { ifslave slave; ifbond master; @@ -4856,11 +4849,6 @@ static inline void ethtool_notify(struct net_device *dev, unsigned int cmd, } #endif -static inline -struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) -{ - return __skb_gso_segment(skb, features, true); -} __be16 skb_network_protocol(struct sk_buff *skb, int *depth); static inline bool can_checksum_protocol(netdev_features_t features, @@ -4987,6 +4975,7 @@ netdev_features_t passthru_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); netdev_features_t netif_skb_features(struct sk_buff *skb); +void skb_warn_bad_offload(const struct sk_buff *skb); static inline bool net_gso_ok(netdev_features_t features, int gso_type) { @@ -5035,19 +5024,6 @@ void netif_set_tso_max_segs(struct net_device *dev, unsigned int segs); void netif_inherit_tso_max(struct net_device *to, const struct net_device *from); -static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, - int pulled_hlen, u16 mac_offset, - int mac_len) -{ - skb->protocol = protocol; - skb->encapsulation = 1; - skb_push(skb, pulled_hlen); - skb_reset_transport_header(skb); - skb->mac_header = mac_offset; - skb->network_header = skb->mac_header + mac_len; - skb->mac_len = mac_len; -} - static inline bool netif_is_macsec(const struct net_device *dev) { return dev->priv_flags & IFF_MACSEC; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0b40417457cd1..fdd9db2612968 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3992,8 +3992,6 @@ int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); -bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu); -bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features, unsigned int offset); @@ -4859,75 +4857,6 @@ static inline struct sec_path *skb_sec_path(const struct sk_buff *skb) #endif } -/* Keeps track of mac header offset relative to skb->head. - * It is useful for TSO of Tunneling protocol. e.g. GRE. - * For non-tunnel skb it points to skb_mac_header() and for - * tunnel skb it points to outer mac header. - * Keeps track of level of encapsulation of network headers. - */ -struct skb_gso_cb { - union { - int mac_offset; - int data_offset; - }; - int encap_level; - __wsum csum; - __u16 csum_start; -}; -#define SKB_GSO_CB_OFFSET 32 -#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_GSO_CB_OFFSET)) - -static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) -{ - return (skb_mac_header(inner_skb) - inner_skb->head) - - SKB_GSO_CB(inner_skb)->mac_offset; -} - -static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra) -{ - int new_headroom, headroom; - int ret; - - headroom = skb_headroom(skb); - ret = pskb_expand_head(skb, extra, 0, GFP_ATOMIC); - if (ret) - return ret; - - new_headroom = skb_headroom(skb); - SKB_GSO_CB(skb)->mac_offset += (new_headroom - headroom); - return 0; -} - -static inline void gso_reset_checksum(struct sk_buff *skb, __wsum res) -{ - /* Do not update partial checksums if remote checksum is enabled. */ - if (skb->remcsum_offload) - return; - - SKB_GSO_CB(skb)->csum = res; - SKB_GSO_CB(skb)->csum_start = skb_checksum_start(skb) - skb->head; -} - -/* Compute the checksum for a gso segment. First compute the checksum value - * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and - * then add in skb->csum (checksum from csum_start to end of packet). - * skb->csum and csum_start are then updated to reflect the checksum of the - * resultant packet starting from the transport header-- the resultant checksum - * is in the res argument (i.e. normally zero or ~ of checksum of a pseudo - * header. - */ -static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res) -{ - unsigned char *csum_start = skb_transport_header(skb); - int plen = (skb->head + SKB_GSO_CB(skb)->csum_start) - csum_start; - __wsum partial = SKB_GSO_CB(skb)->csum; - - SKB_GSO_CB(skb)->csum = res; - SKB_GSO_CB(skb)->csum_start = csum_start - skb->head; - - return csum_fold(csum_partial(csum_start, plen, partial)); -} - static inline bool skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index 8e984d75f5b6c..6b0a7dc48a4b7 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -101,6 +101,7 @@ struct spi_mem_op { u8 nbytes; u8 buswidth; u8 dtr : 1; + u8 __pad : 7; u16 opcode; } cmd; @@ -108,6 +109,7 @@ struct spi_mem_op { u8 nbytes; u8 buswidth; u8 dtr : 1; + u8 __pad : 7; u64 val; } addr; @@ -115,12 +117,14 @@ struct spi_mem_op { u8 nbytes; u8 buswidth; u8 dtr : 1; + u8 __pad : 7; } dummy; struct { u8 buswidth; u8 dtr : 1; u8 ecc : 1; + u8 __pad : 6; enum spi_mem_data_dir dir; unsigned int nbytes; union { diff --git a/include/net/gro.h b/include/net/gro.h index a4fab706240d2..d3d318e7d917b 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -446,5 +446,49 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, gro_normal_list(napi); } +/* This function is the alternative of 'inet_iif' and 'inet_sdif' + * functions in case we can not rely on fields of IPCB. + * + * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized. + * The caller must hold the RCU read lock. + */ +static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif) +{ + *iif = inet_iif(skb) ?: skb->dev->ifindex; + *sdif = 0; + +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (netif_is_l3_slave(skb->dev)) { + struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev); + + *sdif = *iif; + *iif = master ? master->ifindex : 0; + } +#endif +} + +/* This function is the alternative of 'inet6_iif' and 'inet6_sdif' + * functions in case we can not rely on fields of IP6CB. + * + * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized. + * The caller must hold the RCU read lock. + */ +static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif) +{ + /* using skb->dev->ifindex because skb_dst(skb) is not initialized */ + *iif = skb->dev->ifindex; + *sdif = 0; + +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (netif_is_l3_slave(skb->dev)) { + struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev); + + *sdif = *iif; + *iif = master ? master->ifindex : 0; + } +#endif +} + +extern struct list_head offload_base; #endif /* _NET_IPV6_GRO_H */ diff --git a/include/net/gso.h b/include/net/gso.h new file mode 100644 index 0000000000000..29975440cad51 --- /dev/null +++ b/include/net/gso.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _NET_GSO_H +#define _NET_GSO_H + +#include + +/* Keeps track of mac header offset relative to skb->head. + * It is useful for TSO of Tunneling protocol. e.g. GRE. + * For non-tunnel skb it points to skb_mac_header() and for + * tunnel skb it points to outer mac header. + * Keeps track of level of encapsulation of network headers. + */ +struct skb_gso_cb { + union { + int mac_offset; + int data_offset; + }; + int encap_level; + __wsum csum; + __u16 csum_start; +}; +#define SKB_GSO_CB_OFFSET 32 +#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_GSO_CB_OFFSET)) + +static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) +{ + return (skb_mac_header(inner_skb) - inner_skb->head) - + SKB_GSO_CB(inner_skb)->mac_offset; +} + +static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra) +{ + int new_headroom, headroom; + int ret; + + headroom = skb_headroom(skb); + ret = pskb_expand_head(skb, extra, 0, GFP_ATOMIC); + if (ret) + return ret; + + new_headroom = skb_headroom(skb); + SKB_GSO_CB(skb)->mac_offset += (new_headroom - headroom); + return 0; +} + +static inline void gso_reset_checksum(struct sk_buff *skb, __wsum res) +{ + /* Do not update partial checksums if remote checksum is enabled. */ + if (skb->remcsum_offload) + return; + + SKB_GSO_CB(skb)->csum = res; + SKB_GSO_CB(skb)->csum_start = skb_checksum_start(skb) - skb->head; +} + +/* Compute the checksum for a gso segment. First compute the checksum value + * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and + * then add in skb->csum (checksum from csum_start to end of packet). + * skb->csum and csum_start are then updated to reflect the checksum of the + * resultant packet starting from the transport header-- the resultant checksum + * is in the res argument (i.e. normally zero or ~ of checksum of a pseudo + * header. + */ +static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res) +{ + unsigned char *csum_start = skb_transport_header(skb); + int plen = (skb->head + SKB_GSO_CB(skb)->csum_start) - csum_start; + __wsum partial = SKB_GSO_CB(skb)->csum; + + SKB_GSO_CB(skb)->csum = res; + SKB_GSO_CB(skb)->csum_start = csum_start - skb->head; + + return csum_fold(csum_partial(csum_start, plen, partial)); +} + +struct sk_buff *__skb_gso_segment(struct sk_buff *skb, + netdev_features_t features, bool tx_path); + +static inline struct sk_buff *skb_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + return __skb_gso_segment(skb, features, true); +} + +struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb, + netdev_features_t features, __be16 type); + +struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features); + +bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu); + +bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len); + +static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, + int pulled_hlen, u16 mac_offset, + int mac_len) +{ + skb->protocol = protocol; + skb->encapsulation = 1; + skb_push(skb, pulled_hlen); + skb_reset_transport_header(skb); + skb->mac_header = mac_offset; + skb->network_header = skb->mac_header + mac_len; + skb->mac_len = mac_len; +} + +#endif /* _NET_GSO_H */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index caa20a9055310..0bb32bfc61832 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -107,11 +107,12 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) { - if (!sk->sk_mark && - READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)) + u32 mark = READ_ONCE(sk->sk_mark); + + if (!mark && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)) return skb->mark; - return sk->sk_mark; + return mark; } static inline int inet_request_bound_dev_if(const struct sock *sk, diff --git a/include/net/ip.h b/include/net/ip.h index 83a1a9bc3ceb1..530e7257e4389 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -93,7 +93,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, { ipcm_init(ipcm); - ipcm->sockc.mark = inet->sk.sk_mark; + ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark); ipcm->sockc.tsflags = inet->sk.sk_tsflags; ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); ipcm->addr = inet->inet_saddr; diff --git a/include/net/route.h b/include/net/route.h index bcc367cf3aa2d..9ca0f72868b76 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -168,7 +168,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi __be16 dport, __be16 sport, __u8 proto, __u8 tos, int oif) { - flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, + flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, daddr, saddr, dport, sport, sock_net_uid(net, sk)); @@ -301,7 +301,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, if (inet_sk(sk)->transparent) flow_flags |= FLOWI_FLAG_ANYSRC; - flowi4_init_output(fl4, oif, sk->sk_mark, ip_sock_rt_tos(sk), + flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), protocol, flow_flags, dst, src, dport, sport, sk->sk_uid); } diff --git a/include/net/udp.h b/include/net/udp.h index de4b528522bb9..94f3486c43e33 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/include/net/vxlan.h b/include/net/vxlan.h index b57567296bc67..fae2893613aa2 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -554,12 +554,12 @@ static inline void vxlan_flag_attr_error(int attrtype, } static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh, - int hash, + u32 hash, struct vxlan_rdst *rdst) { struct fib_nh_common *nhc; - nhc = nexthop_path_fdb_result(nh, hash); + nhc = nexthop_path_fdb_result(nh, hash >> 1); if (unlikely(!nhc)) return false; diff --git a/io_uring/timeout.c b/io_uring/timeout.c index fc950177e2e1d..350eb830b4855 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -594,7 +594,7 @@ int io_timeout(struct io_kiocb *req, unsigned int issue_flags) goto add; } - tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); + tail = data_race(ctx->cached_cq_tail) - atomic_read(&ctx->cq_timeouts); timeout->target_seq = tail + off; /* Update the last seq here in case io_flush_timeouts() hasn't. diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index 540331b610a97..addf3dd57b59b 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c @@ -86,9 +86,6 @@ static struct bpf_map *bloom_map_alloc(union bpf_attr *attr) int numa_node = bpf_map_attr_numa_node(attr); struct bpf_bloom_filter *bloom; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - if (attr->key_size != 0 || attr->value_size == 0 || attr->max_entries == 0 || attr->map_flags & ~BLOOM_CREATE_FLAG_MASK || diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 47d9948d768f0..b5149cfce7d4d 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -723,9 +723,6 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr) !attr->btf_key_type_id || !attr->btf_value_type_id) return -EINVAL; - if (!bpf_capable()) - return -EPERM; - if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE) return -E2BIG; diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index d3f0a4825fa61..116a0ce378ecd 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -655,9 +655,6 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) const struct btf_type *t, *vt; struct bpf_map *map; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id); if (!st_ops) return ERR_PTR(-ENOTSUPP); diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 3da63be602d1c..286ab3db0fde8 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include @@ -74,6 +74,7 @@ struct bpf_cpu_map_entry { struct rcu_head rcu; struct work_struct kthread_stop_wq; + struct completion kthread_running; }; struct bpf_cpu_map { @@ -89,9 +90,6 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) u32 value_size = attr->value_size; struct bpf_cpu_map *cmap; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || (value_size != offsetofend(struct bpf_cpumap_val, qsize) && @@ -133,11 +131,17 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring) * invoked cpu_map_kthread_stop(). Catch any broken behaviour * gracefully and warn once. */ - struct xdp_frame *xdpf; + void *ptr; - while ((xdpf = ptr_ring_consume(ring))) - if (WARN_ON_ONCE(xdpf)) - xdp_return_frame(xdpf); + while ((ptr = ptr_ring_consume(ring))) { + WARN_ON_ONCE(1); + if (unlikely(__ptr_test_bit(0, &ptr))) { + __ptr_clear_bit(0, &ptr); + kfree_skb(ptr); + continue; + } + xdp_return_frame(ptr); + } } static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) @@ -157,7 +161,6 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) static void cpu_map_kthread_stop(struct work_struct *work) { struct bpf_cpu_map_entry *rcpu; - int err; rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq); @@ -167,14 +170,7 @@ static void cpu_map_kthread_stop(struct work_struct *work) rcu_barrier(); /* kthread_stop will wake_up_process and wait for it to complete */ - err = kthread_stop(rcpu->kthread); - if (err) { - /* kthread_stop may be called before cpu_map_kthread_run - * is executed, so we need to release the memory related - * to rcpu. - */ - put_cpu_map_entry(rcpu); - } + kthread_stop(rcpu->kthread); } static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu, @@ -302,11 +298,11 @@ static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, return nframes; } - static int cpu_map_kthread_run(void *data) { struct bpf_cpu_map_entry *rcpu = data; + complete(&rcpu->kthread_running); set_current_state(TASK_INTERRUPTIBLE); /* When kthread gives stop order, then rcpu have been disconnected @@ -471,6 +467,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value, goto free_ptr_ring; /* Setup kthread */ + init_completion(&rcpu->kthread_running); rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa, "cpumap/%d/map:%d", cpu, map->id); @@ -484,6 +481,12 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value, kthread_bind(rcpu->kthread, cpu); wake_up_process(rcpu->kthread); + /* Make sure kthread has been running, so kthread_stop() will not + * stop the kthread prematurely and all pending frames or skbs + * will be handled by the kthread before kthread_stop() returns. + */ + wait_for_completion(&rcpu->kthread_running); + return rcpu; free_prog: diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 802692fa3905c..49cc0b5671c61 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -160,9 +160,6 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) struct bpf_dtab *dtab; int err; - if (!capable(CAP_NET_ADMIN)) - return ERR_PTR(-EPERM); - dtab = bpf_map_area_alloc(sizeof(*dtab), NUMA_NO_NODE); if (!dtab) return ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 9901efee4339d..56d3da7d0bc66 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -422,12 +422,6 @@ static int htab_map_alloc_check(union bpf_attr *attr) BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) != offsetof(struct htab_elem, hash_node.pprev)); - if (lru && !bpf_capable()) - /* LRU implementation is much complicated than other - * maps. Hence, limit to CAP_BPF. - */ - return -EPERM; - if (zero_seed && !capable(CAP_SYS_ADMIN)) /* Guard against local DoS, and discourage production use. */ return -EPERM; diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index e0d3ddf2037ab..17c7e7782a1f7 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -544,9 +544,6 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) { struct lpm_trie *trie; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - /* check sanity of attributes */ if (attr->max_entries == 0 || !(attr->map_flags & BPF_F_NO_PREALLOC) || diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index 601609164ef34..8d2ddcb7566b7 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include "percpu_freelist.h" @@ -46,9 +45,6 @@ static bool queue_stack_map_is_full(struct bpf_queue_stack *qs) /* Called from syscall */ static int queue_stack_map_alloc_check(union bpf_attr *attr) { - if (!bpf_capable()) - return -EPERM; - /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 0 || attr->value_size == 0 || diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index cbf2d8d784b89..4b4f9670f1a9a 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -151,9 +151,6 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr) int numa_node = bpf_map_attr_numa_node(attr); struct reuseport_array *array; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - /* allocate all map elements and zero-initialize them */ array = bpf_map_area_alloc(struct_size(array, ptrs, attr->max_entries), numa_node); if (!array) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index b25fce425b2c6..458bb80b14d57 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -74,9 +74,6 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) u64 cost, n_buckets; int err; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - if (attr->map_flags & ~STACK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 5524fcf6fb2a4..f715ec5d541ad 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -109,37 +109,6 @@ const struct bpf_map_ops bpf_map_offload_ops = { .map_mem_usage = bpf_map_offload_map_mem_usage, }; -static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) -{ - const struct bpf_map_ops *ops; - u32 type = attr->map_type; - struct bpf_map *map; - int err; - - if (type >= ARRAY_SIZE(bpf_map_types)) - return ERR_PTR(-EINVAL); - type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types)); - ops = bpf_map_types[type]; - if (!ops) - return ERR_PTR(-EINVAL); - - if (ops->map_alloc_check) { - err = ops->map_alloc_check(attr); - if (err) - return ERR_PTR(err); - } - if (attr->map_ifindex) - ops = &bpf_map_offload_ops; - if (!ops->map_mem_usage) - return ERR_PTR(-EINVAL); - map = ops->map_alloc(attr); - if (IS_ERR(map)) - return map; - map->ops = ops; - map->map_type = type; - return map; -} - static void bpf_map_write_active_inc(struct bpf_map *map) { atomic64_inc(&map->writecnt); @@ -1127,7 +1096,9 @@ free_map_tab: /* called via syscall */ static int map_create(union bpf_attr *attr) { + const struct bpf_map_ops *ops; int numa_node = bpf_map_attr_numa_node(attr); + u32 map_type = attr->map_type; struct bpf_map *map; int f_flags; int err; @@ -1158,9 +1129,85 @@ static int map_create(union bpf_attr *attr) return -EINVAL; /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ - map = find_and_alloc_map(attr); + map_type = attr->map_type; + if (map_type >= ARRAY_SIZE(bpf_map_types)) + return -EINVAL; + map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types)); + ops = bpf_map_types[map_type]; + if (!ops) + return -EINVAL; + + if (ops->map_alloc_check) { + err = ops->map_alloc_check(attr); + if (err) + return err; + } + if (attr->map_ifindex) + ops = &bpf_map_offload_ops; + if (!ops->map_mem_usage) + return -EINVAL; + + /* Intent here is for unprivileged_bpf_disabled to block BPF map + * creation for unprivileged users; other actions depend + * on fd availability and access to bpffs, so are dependent on + * object creation success. Even with unprivileged BPF disabled, + * capability checks are still carried out. + */ + if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) + return -EPERM; + + /* check privileged map type permissions */ + switch (map_type) { + case BPF_MAP_TYPE_ARRAY: + case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_PROG_ARRAY: + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + case BPF_MAP_TYPE_CGROUP_ARRAY: + case BPF_MAP_TYPE_ARRAY_OF_MAPS: + case BPF_MAP_TYPE_HASH: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_HASH_OF_MAPS: + case BPF_MAP_TYPE_RINGBUF: + case BPF_MAP_TYPE_USER_RINGBUF: + case BPF_MAP_TYPE_CGROUP_STORAGE: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: + /* unprivileged */ + break; + case BPF_MAP_TYPE_SK_STORAGE: + case BPF_MAP_TYPE_INODE_STORAGE: + case BPF_MAP_TYPE_TASK_STORAGE: + case BPF_MAP_TYPE_CGRP_STORAGE: + case BPF_MAP_TYPE_BLOOM_FILTER: + case BPF_MAP_TYPE_LPM_TRIE: + case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: + case BPF_MAP_TYPE_STACK_TRACE: + case BPF_MAP_TYPE_QUEUE: + case BPF_MAP_TYPE_STACK: + case BPF_MAP_TYPE_LRU_HASH: + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + case BPF_MAP_TYPE_STRUCT_OPS: + case BPF_MAP_TYPE_CPUMAP: + if (!bpf_capable()) + return -EPERM; + break; + case BPF_MAP_TYPE_SOCKMAP: + case BPF_MAP_TYPE_SOCKHASH: + case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: + case BPF_MAP_TYPE_XSKMAP: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + break; + default: + WARN(1, "unsupported map type %d", map_type); + return -EPERM; + } + + map = ops->map_alloc(attr); if (IS_ERR(map)) return PTR_ERR(map); + map->ops = ops; + map->map_type = map_type; err = bpf_obj_name_cpy(map->name, attr->map_name, sizeof(attr->map_name)); @@ -2535,6 +2582,16 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) /* eBPF programs must be GPL compatible to use GPL-ed functions */ is_gpl = license_is_gpl_compatible(license); + /* Intent here is for unprivileged_bpf_disabled to block BPF program + * creation for unprivileged users; other actions depend + * on fd availability and access to bpffs, so are dependent on + * object creation success. Even with unprivileged BPF disabled, + * capability checks are still carried out for these + * and other operations. + */ + if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) + return -EPERM; + if (attr->insn_cnt == 0 || attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) return -E2BIG; @@ -5018,23 +5075,8 @@ out_prog_put: static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) { union bpf_attr attr; - bool capable; int err; - capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled; - - /* Intent here is for unprivileged_bpf_disabled to block key object - * creation commands for unprivileged users; other actions depend - * of fd availability and access to bpffs, so are dependent on - * object creation success. Capabilities are later verified for - * operations such as load and map create, so even with unprivileged - * BPF disabled, capability checks are still carried out for these - * and other operations. - */ - if (!capable && - (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD)) - return -EPERM; - err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); if (err) return err; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 1f4b07da327a6..a53524f3f7d82 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -661,8 +661,7 @@ static DEFINE_PER_CPU(int, bpf_trace_nest_level); BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, u64, flags, void *, data, u64, size) { - struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds); - int nest_level = this_cpu_inc_return(bpf_trace_nest_level); + struct bpf_trace_sample_data *sds; struct perf_raw_record raw = { .frag = { .size = size, @@ -670,7 +669,11 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, }, }; struct perf_sample_data *sd; - int err; + int nest_level, err; + + preempt_disable(); + sds = this_cpu_ptr(&bpf_trace_sds); + nest_level = this_cpu_inc_return(bpf_trace_nest_level); if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) { err = -EBUSY; @@ -688,9 +691,9 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, perf_sample_save_raw_data(sd, &raw); err = __bpf_perf_event_output(regs, map, flags, sd); - out: this_cpu_dec(bpf_trace_nest_level); + preempt_enable(); return err; } @@ -715,7 +718,6 @@ static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds); u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) { - int nest_level = this_cpu_inc_return(bpf_event_output_nest_level); struct perf_raw_frag frag = { .copy = ctx_copy, .size = ctx_size, @@ -732,8 +734,12 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, }; struct perf_sample_data *sd; struct pt_regs *regs; + int nest_level; u64 ret; + preempt_disable(); + nest_level = this_cpu_inc_return(bpf_event_output_nest_level); + if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) { ret = -EBUSY; goto out; @@ -748,6 +754,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, ret = __bpf_perf_event_output(regs, map, flags, sd); out: this_cpu_dec(bpf_event_output_nest_level); + preempt_enable(); return ret; } diff --git a/lib/Makefile b/lib/Makefile index 876fcdeae34ec..05d8ec332baac 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -82,7 +82,13 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o obj-$(CONFIG_TEST_DYNAMIC_DEBUG) += test_dynamic_debug.o obj-$(CONFIG_TEST_PRINTF) += test_printf.o obj-$(CONFIG_TEST_SCANF) += test_scanf.o + obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o +ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_KASAN),yy) +# FIXME: Clang breaks test_bitmap_const_eval when KASAN and GCOV are enabled +GCOV_PROFILE_test_bitmap.o := n +endif + obj-$(CONFIG_TEST_UUID) += test_uuid.o obj-$(CONFIG_TEST_XARRAY) += test_xarray.o obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 984985c39c9b0..a517256a270b7 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -498,6 +498,15 @@ static void debug_print_object(struct debug_obj *obj, char *msg) const struct debug_obj_descr *descr = obj->descr; static int limit; + /* + * Don't report if lookup_object_or_alloc() by the current thread + * failed because lookup_object_or_alloc()/debug_objects_oom() by a + * concurrent thread turned off debug_objects_enabled and cleared + * the hash buckets. + */ + if (!debug_objects_enabled) + return; + if (limit < 5 && descr != descr_test) { void *hint = descr->debug_hint ? descr->debug_hint(obj->object) : NULL; diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index a8005ad3bd589..37a9108c4f588 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -1149,6 +1149,10 @@ static void __init test_bitmap_print_buf(void) } } +/* + * FIXME: Clang breaks compile-time evaluations when KASAN and GCOV are enabled. + * To workaround it, GCOV is force-disabled in Makefile for this configuration. + */ static void __init test_bitmap_const_eval(void) { DECLARE_BITMAP(bitmap, BITS_PER_LONG); @@ -1174,11 +1178,7 @@ static void __init test_bitmap_const_eval(void) * the compiler is fixed. */ bitmap_clear(bitmap, 0, BITS_PER_LONG); -#if defined(__s390__) && defined(__clang__) - if (!const_test_bit(7, bitmap)) -#else if (!test_bit(7, bitmap)) -#endif bitmap_set(bitmap, 5, 2); /* Equals to `unsigned long bitopvar = BIT(20)` */ diff --git a/mm/filemap.c b/mm/filemap.c index 8abce63b259c9..a2006936a6ae2 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1760,9 +1760,7 @@ bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm, * * Return: The index of the gap if found, otherwise an index outside the * range specified (in which case 'return - index >= max_scan' will be true). - * In the rare case of index wrap-around, 0 will be returned. 0 will also - * be returned if index == 0 and there is a gap at the index. We can not - * wrap-around if passed index == 0. + * In the rare case of index wrap-around, 0 will be returned. */ pgoff_t page_cache_next_miss(struct address_space *mapping, pgoff_t index, unsigned long max_scan) @@ -1772,13 +1770,12 @@ pgoff_t page_cache_next_miss(struct address_space *mapping, while (max_scan--) { void *entry = xas_next(&xas); if (!entry || xa_is_value(entry)) - return xas.xa_index; - if (xas.xa_index == 0 && index != 0) - return xas.xa_index; + break; + if (xas.xa_index == 0) + break; } - /* No gaps in range and no wrap-around, return index beyond range */ - return xas.xa_index + 1; + return xas.xa_index; } EXPORT_SYMBOL(page_cache_next_miss); @@ -1799,9 +1796,7 @@ EXPORT_SYMBOL(page_cache_next_miss); * * Return: The index of the gap if found, otherwise an index outside the * range specified (in which case 'index - return >= max_scan' will be true). - * In the rare case of wrap-around, ULONG_MAX will be returned. ULONG_MAX - * will also be returned if index == ULONG_MAX and there is a gap at the - * index. We can not wrap-around if passed index == ULONG_MAX. + * In the rare case of wrap-around, ULONG_MAX will be returned. */ pgoff_t page_cache_prev_miss(struct address_space *mapping, pgoff_t index, unsigned long max_scan) @@ -1811,13 +1806,12 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, while (max_scan--) { void *entry = xas_prev(&xas); if (!entry || xa_is_value(entry)) - return xas.xa_index; - if (xas.xa_index == ULONG_MAX && index != ULONG_MAX) - return xas.xa_index; + break; + if (xas.xa_index == ULONG_MAX) + break; } - /* No gaps in range and no wrap-around, return index beyond range */ - return xas.xa_index - 1; + return xas.xa_index; } EXPORT_SYMBOL(page_cache_prev_miss); diff --git a/mm/gup.c b/mm/gup.c index 94102390b273a..e3e6c473bbc16 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2977,7 +2977,7 @@ static int internal_get_user_pages_fast(unsigned long start, start = untagged_addr(start) & PAGE_MASK; len = nr_pages << PAGE_SHIFT; if (check_add_overflow(start, len, &end)) - return 0; + return -EOVERFLOW; if (end > TASK_SIZE_MAX) return -EFAULT; if (unlikely(!access_ok((void __user *)start, len))) diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index f9cb5af9894c6..4d837ab83f083 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -489,7 +489,7 @@ static void __kasan_record_aux_stack(void *addr, bool can_alloc) return; alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0]; - alloc_meta->aux_stack[0] = kasan_save_stack(GFP_NOWAIT, can_alloc); + alloc_meta->aux_stack[0] = kasan_save_stack(0, can_alloc); } void kasan_record_aux_stack(void *addr) @@ -519,7 +519,7 @@ void kasan_save_free_info(struct kmem_cache *cache, void *object) if (!free_meta) return; - kasan_set_track(&free_meta->free_track, GFP_NOWAIT); + kasan_set_track(&free_meta->free_track, 0); /* The object was freed and has free track set. */ *(u8 *)kasan_mem_to_shadow(object) = KASAN_SLAB_FREETRACK; } diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c index 67a222586846e..7dcfe341d48e3 100644 --- a/mm/kasan/tags.c +++ b/mm/kasan/tags.c @@ -140,5 +140,5 @@ void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags) void kasan_save_free_info(struct kmem_cache *cache, void *object) { - save_stack_info(cache, object, GFP_NOWAIT, true); + save_stack_info(cache, object, 0, true); } diff --git a/mm/kmsan/core.c b/mm/kmsan/core.c index 7d1e4aa30bae6..3adb4c1d3b193 100644 --- a/mm/kmsan/core.c +++ b/mm/kmsan/core.c @@ -74,7 +74,7 @@ depot_stack_handle_t kmsan_save_stack_with_flags(gfp_t flags, nr_entries = stack_trace_save(entries, KMSAN_STACK_DEPTH, 0); /* Don't sleep. */ - flags &= ~__GFP_DIRECT_RECLAIM; + flags &= ~(__GFP_DIRECT_RECLAIM | __GFP_KSWAPD_RECLAIM); handle = __stack_depot_save(entries, nr_entries, flags, true); return stack_depot_set_extra_bits(handle, extra); @@ -245,7 +245,7 @@ depot_stack_handle_t kmsan_internal_chain_origin(depot_stack_handle_t id) extra_bits = kmsan_extra_bits(depth, uaf); entries[0] = KMSAN_CHAIN_MAGIC_ORIGIN; - entries[1] = kmsan_save_stack_with_flags(GFP_ATOMIC, 0); + entries[1] = kmsan_save_stack_with_flags(__GFP_HIGH, 0); entries[2] = id; /* * @entries is a local var in non-instrumented code, so KMSAN does not @@ -253,7 +253,7 @@ depot_stack_handle_t kmsan_internal_chain_origin(depot_stack_handle_t id) * positives when __stack_depot_save() passes it to instrumented code. */ kmsan_internal_unpoison_memory(entries, sizeof(entries), false); - handle = __stack_depot_save(entries, ARRAY_SIZE(entries), GFP_ATOMIC, + handle = __stack_depot_save(entries, ARRAY_SIZE(entries), __GFP_HIGH, true); return stack_depot_set_extra_bits(handle, extra_bits); } diff --git a/mm/kmsan/instrumentation.c b/mm/kmsan/instrumentation.c index cf12e9616b243..cc3907a9c33a0 100644 --- a/mm/kmsan/instrumentation.c +++ b/mm/kmsan/instrumentation.c @@ -282,7 +282,7 @@ void __msan_poison_alloca(void *address, uintptr_t size, char *descr) /* stack_depot_save() may allocate memory. */ kmsan_enter_runtime(); - handle = stack_depot_save(entries, ARRAY_SIZE(entries), GFP_ATOMIC); + handle = stack_depot_save(entries, ARRAY_SIZE(entries), __GFP_HIGH); kmsan_leave_runtime(); kmsan_internal_set_shadow_origin(address, size, -1, handle, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4b27e245a055f..c823c35c2ed46 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3208,12 +3208,12 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, * accumulating over a page of vmstat data or when pgdat or idx * changes. */ - if (stock->cached_objcg != objcg) { + if (READ_ONCE(stock->cached_objcg) != objcg) { old = drain_obj_stock(stock); obj_cgroup_get(objcg); stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes) ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0; - stock->cached_objcg = objcg; + WRITE_ONCE(stock->cached_objcg, objcg); stock->cached_pgdat = pgdat; } else if (stock->cached_pgdat != pgdat) { /* Flush the existing cached vmstat data */ @@ -3267,7 +3267,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) local_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); - if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) { + if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) { stock->nr_bytes -= nr_bytes; ret = true; } @@ -3279,7 +3279,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock) { - struct obj_cgroup *old = stock->cached_objcg; + struct obj_cgroup *old = READ_ONCE(stock->cached_objcg); if (!old) return NULL; @@ -3332,7 +3332,7 @@ static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock) stock->cached_pgdat = NULL; } - stock->cached_objcg = NULL; + WRITE_ONCE(stock->cached_objcg, NULL); /* * The `old' objects needs to be released by the caller via * obj_cgroup_put() outside of memcg_stock_pcp::stock_lock. @@ -3343,10 +3343,11 @@ static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock) static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, struct mem_cgroup *root_memcg) { + struct obj_cgroup *objcg = READ_ONCE(stock->cached_objcg); struct mem_cgroup *memcg; - if (stock->cached_objcg) { - memcg = obj_cgroup_memcg(stock->cached_objcg); + if (objcg) { + memcg = obj_cgroup_memcg(objcg); if (memcg && mem_cgroup_is_descendant(memcg, root_memcg)) return true; } @@ -3365,10 +3366,10 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, local_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); - if (stock->cached_objcg != objcg) { /* reset if necessary */ + if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */ old = drain_obj_stock(stock); obj_cgroup_get(objcg); - stock->cached_objcg = objcg; + WRITE_ONCE(stock->cached_objcg, objcg); stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes) ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0; allow_uncharge = true; /* Allow uncharge when objcg changes */ diff --git a/mm/memory.c b/mm/memory.c index 07bab1e774994..402ee697698e6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5410,27 +5410,28 @@ retry: if (!vma_is_anonymous(vma)) goto inval; - /* find_mergeable_anon_vma uses adjacent vmas which are not locked */ - if (!vma->anon_vma) - goto inval; - if (!vma_start_read(vma)) goto inval; + /* + * find_mergeable_anon_vma uses adjacent vmas which are not locked. + * This check must happen after vma_start_read(); otherwise, a + * concurrent mremap() with MREMAP_DONTUNMAP could dissociate the VMA + * from its anon_vma. + */ + if (unlikely(!vma->anon_vma)) + goto inval_end_read; + /* * Due to the possibility of userfault handler dropping mmap_lock, avoid * it for now and fall back to page fault handling under mmap_lock. */ - if (userfaultfd_armed(vma)) { - vma_end_read(vma); - goto inval; - } + if (userfaultfd_armed(vma)) + goto inval_end_read; /* Check since vm_start/vm_end might change before we lock the VMA */ - if (unlikely(address < vma->vm_start || address >= vma->vm_end)) { - vma_end_read(vma); - goto inval; - } + if (unlikely(address < vma->vm_start || address >= vma->vm_end)) + goto inval_end_read; /* Check if the VMA got isolated after we found it */ if (vma->detached) { @@ -5442,6 +5443,9 @@ retry: rcu_read_unlock(); return vma; + +inval_end_read: + vma_end_read(vma); inval: rcu_read_unlock(); count_vm_vma_lock_event(VMA_LOCK_ABORT); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index eebe256104bc0..947ca580bb9a2 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -46,6 +46,7 @@ static const struct proto_ops l2cap_sock_ops; static void l2cap_sock_init(struct sock *sk, struct sock *parent); static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern); +static void l2cap_sock_cleanup_listen(struct sock *parent); bool l2cap_is_socket(struct socket *sock) { @@ -1415,6 +1416,7 @@ static int l2cap_sock_release(struct socket *sock) if (!sk) return 0; + l2cap_sock_cleanup_listen(sk); bt_sock_unlink(&l2cap_sk_list, sk); err = l2cap_sock_shutdown(sock, SHUT_RDWR); diff --git a/net/can/raw.c b/net/can/raw.c index f64469b98260f..f8e3866157a33 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -873,7 +873,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) skb->dev = dev; skb->priority = sk->sk_priority; - skb->mark = sk->sk_mark; + skb->mark = READ_ONCE(sk->sk_mark); skb->tstamp = sockc.transmit_time; skb_setup_tx_timestamp(skb, sockc.tsflags); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 11c04e7d928eb..658a6f2320cfa 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -3334,17 +3334,24 @@ static int linger_reg_commit_wait(struct ceph_osd_linger_request *lreq) int ret; dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); - ret = wait_for_completion_interruptible(&lreq->reg_commit_wait); + ret = wait_for_completion_killable(&lreq->reg_commit_wait); return ret ?: lreq->reg_commit_error; } -static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq) +static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq, + unsigned long timeout) { - int ret; + long left; dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); - ret = wait_for_completion_interruptible(&lreq->notify_finish_wait); - return ret ?: lreq->notify_finish_error; + left = wait_for_completion_killable_timeout(&lreq->notify_finish_wait, + ceph_timeout_jiffies(timeout)); + if (left <= 0) + left = left ?: -ETIMEDOUT; + else + left = lreq->notify_finish_error; /* completed */ + + return left; } /* @@ -4896,7 +4903,8 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, linger_submit(lreq); ret = linger_reg_commit_wait(lreq); if (!ret) - ret = linger_notify_finish_wait(lreq); + ret = linger_notify_finish_wait(lreq, + msecs_to_jiffies(2 * timeout * MSEC_PER_SEC)); else dout("lreq %p failed to initiate notify %d\n", lreq, ret); diff --git a/net/core/Makefile b/net/core/Makefile index 8f367813bc681..731db2eaa6107 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -13,7 +13,7 @@ obj-y += dev.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ fib_notifier.o xdp.o flow_offload.o gro.o \ - netdev-genl.o netdev-genl-gen.o + netdev-genl.o netdev-genl-gen.o gso.o obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index d4172534dfa8d..cca7594be92ec 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -496,8 +496,11 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs) return ERR_PTR(-EPERM); nla_for_each_nested(nla, nla_stgs, rem) { - if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) + if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) { + if (nla_len(nla) != sizeof(u32)) + return ERR_PTR(-EINVAL); nr_maps++; + } } diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL); diff --git a/net/core/dev.c b/net/core/dev.c index c29f3e1db3ca7..44a4eb76a659e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3209,7 +3209,7 @@ static u16 skb_tx_hash(const struct net_device *dev, return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset; } -static void skb_warn_bad_offload(const struct sk_buff *skb) +void skb_warn_bad_offload(const struct sk_buff *skb) { static const netdev_features_t null_features; struct net_device *dev = skb->dev; @@ -3338,74 +3338,6 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth) return vlan_get_protocol_and_depth(skb, type, depth); } -/* openvswitch calls this on rx path, so we need a different check. - */ -static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) -{ - if (tx_path) - return skb->ip_summed != CHECKSUM_PARTIAL && - skb->ip_summed != CHECKSUM_UNNECESSARY; - - return skb->ip_summed == CHECKSUM_NONE; -} - -/** - * __skb_gso_segment - Perform segmentation on skb. - * @skb: buffer to segment - * @features: features for the output path (see dev->features) - * @tx_path: whether it is called in TX path - * - * This function segments the given skb and returns a list of segments. - * - * It may return NULL if the skb requires no segmentation. This is - * only possible when GSO is used for verifying header integrity. - * - * Segmentation preserves SKB_GSO_CB_OFFSET bytes of previous skb cb. - */ -struct sk_buff *__skb_gso_segment(struct sk_buff *skb, - netdev_features_t features, bool tx_path) -{ - struct sk_buff *segs; - - if (unlikely(skb_needs_check(skb, tx_path))) { - int err; - - /* We're going to init ->check field in TCP or UDP header */ - err = skb_cow_head(skb, 0); - if (err < 0) - return ERR_PTR(err); - } - - /* Only report GSO partial support if it will enable us to - * support segmentation on this frame without needing additional - * work. - */ - if (features & NETIF_F_GSO_PARTIAL) { - netdev_features_t partial_features = NETIF_F_GSO_ROBUST; - struct net_device *dev = skb->dev; - - partial_features |= dev->features & dev->gso_partial_features; - if (!skb_gso_ok(skb, features | partial_features)) - features &= ~NETIF_F_GSO_PARTIAL; - } - - BUILD_BUG_ON(SKB_GSO_CB_OFFSET + - sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb)); - - SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); - SKB_GSO_CB(skb)->encap_level = 0; - - skb_reset_mac_header(skb); - skb_reset_mac_len(skb); - - segs = skb_mac_gso_segment(skb, features); - - if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs))) - skb_warn_bad_offload(skb); - - return segs; -} -EXPORT_SYMBOL(__skb_gso_segment); /* Take action when hardware reception checksum errors are detected. */ #ifdef CONFIG_BUG diff --git a/net/core/gro.c b/net/core/gro.c index 2d84165cb4f1d..2f1b6524bddc5 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -10,7 +10,7 @@ #define GRO_MAX_HEAD (MAX_HEADER + 128) static DEFINE_SPINLOCK(offload_lock); -static struct list_head offload_base __read_mostly = LIST_HEAD_INIT(offload_base); +struct list_head offload_base __read_mostly = LIST_HEAD_INIT(offload_base); /* Maximum number of GRO_NORMAL skbs to batch up for list-RX */ int gro_normal_batch __read_mostly = 8; @@ -92,63 +92,6 @@ void dev_remove_offload(struct packet_offload *po) } EXPORT_SYMBOL(dev_remove_offload); -/** - * skb_eth_gso_segment - segmentation handler for ethernet protocols. - * @skb: buffer to segment - * @features: features for the output path (see dev->features) - * @type: Ethernet Protocol ID - */ -struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb, - netdev_features_t features, __be16 type) -{ - struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); - struct packet_offload *ptype; - - rcu_read_lock(); - list_for_each_entry_rcu(ptype, &offload_base, list) { - if (ptype->type == type && ptype->callbacks.gso_segment) { - segs = ptype->callbacks.gso_segment(skb, features); - break; - } - } - rcu_read_unlock(); - - return segs; -} -EXPORT_SYMBOL(skb_eth_gso_segment); - -/** - * skb_mac_gso_segment - mac layer segmentation handler. - * @skb: buffer to segment - * @features: features for the output path (see dev->features) - */ -struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); - struct packet_offload *ptype; - int vlan_depth = skb->mac_len; - __be16 type = skb_network_protocol(skb, &vlan_depth); - - if (unlikely(!type)) - return ERR_PTR(-EINVAL); - - __skb_pull(skb, vlan_depth); - - rcu_read_lock(); - list_for_each_entry_rcu(ptype, &offload_base, list) { - if (ptype->type == type && ptype->callbacks.gso_segment) { - segs = ptype->callbacks.gso_segment(skb, features); - break; - } - } - rcu_read_unlock(); - - __skb_push(skb, skb->data - skb_mac_header(skb)); - - return segs; -} -EXPORT_SYMBOL(skb_mac_gso_segment); int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) { diff --git a/net/core/gso.c b/net/core/gso.c new file mode 100644 index 0000000000000..9e1803bfc9c6c --- /dev/null +++ b/net/core/gso.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include +#include + +/** + * skb_eth_gso_segment - segmentation handler for ethernet protocols. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + * @type: Ethernet Protocol ID + */ +struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb, + netdev_features_t features, __be16 type) +{ + struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); + struct packet_offload *ptype; + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, &offload_base, list) { + if (ptype->type == type && ptype->callbacks.gso_segment) { + segs = ptype->callbacks.gso_segment(skb, features); + break; + } + } + rcu_read_unlock(); + + return segs; +} +EXPORT_SYMBOL(skb_eth_gso_segment); + +/** + * skb_mac_gso_segment - mac layer segmentation handler. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + */ +struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); + struct packet_offload *ptype; + int vlan_depth = skb->mac_len; + __be16 type = skb_network_protocol(skb, &vlan_depth); + + if (unlikely(!type)) + return ERR_PTR(-EINVAL); + + __skb_pull(skb, vlan_depth); + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, &offload_base, list) { + if (ptype->type == type && ptype->callbacks.gso_segment) { + segs = ptype->callbacks.gso_segment(skb, features); + break; + } + } + rcu_read_unlock(); + + __skb_push(skb, skb->data - skb_mac_header(skb)); + + return segs; +} +EXPORT_SYMBOL(skb_mac_gso_segment); +/* openvswitch calls this on rx path, so we need a different check. + */ +static bool skb_needs_check(const struct sk_buff *skb, bool tx_path) +{ + if (tx_path) + return skb->ip_summed != CHECKSUM_PARTIAL && + skb->ip_summed != CHECKSUM_UNNECESSARY; + + return skb->ip_summed == CHECKSUM_NONE; +} + +/** + * __skb_gso_segment - Perform segmentation on skb. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + * @tx_path: whether it is called in TX path + * + * This function segments the given skb and returns a list of segments. + * + * It may return NULL if the skb requires no segmentation. This is + * only possible when GSO is used for verifying header integrity. + * + * Segmentation preserves SKB_GSO_CB_OFFSET bytes of previous skb cb. + */ +struct sk_buff *__skb_gso_segment(struct sk_buff *skb, + netdev_features_t features, bool tx_path) +{ + struct sk_buff *segs; + + if (unlikely(skb_needs_check(skb, tx_path))) { + int err; + + /* We're going to init ->check field in TCP or UDP header */ + err = skb_cow_head(skb, 0); + if (err < 0) + return ERR_PTR(err); + } + + /* Only report GSO partial support if it will enable us to + * support segmentation on this frame without needing additional + * work. + */ + if (features & NETIF_F_GSO_PARTIAL) { + netdev_features_t partial_features = NETIF_F_GSO_ROBUST; + struct net_device *dev = skb->dev; + + partial_features |= dev->features & dev->gso_partial_features; + if (!skb_gso_ok(skb, features | partial_features)) + features &= ~NETIF_F_GSO_PARTIAL; + } + + BUILD_BUG_ON(SKB_GSO_CB_OFFSET + + sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb)); + + SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); + SKB_GSO_CB(skb)->encap_level = 0; + + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); + + segs = skb_mac_gso_segment(skb, features); + + if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs))) + skb_warn_bad_offload(skb); + + return segs; +} +EXPORT_SYMBOL(__skb_gso_segment); + +/** + * skb_gso_transport_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_transport_seglen is used to determine the real size of the + * individual segments, including Layer4 headers (TCP/UDP). + * + * The MAC/L2 or network (IP, IPv6) headers are not accounted for. + */ +static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) +{ + const struct skb_shared_info *shinfo = skb_shinfo(skb); + unsigned int thlen = 0; + + if (skb->encapsulation) { + thlen = skb_inner_transport_header(skb) - + skb_transport_header(skb); + + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + thlen += inner_tcp_hdrlen(skb); + } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { + thlen = tcp_hdrlen(skb); + } else if (unlikely(skb_is_gso_sctp(skb))) { + thlen = sizeof(struct sctphdr); + } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { + thlen = sizeof(struct udphdr); + } + /* UFO sets gso_size to the size of the fragmentation + * payload, i.e. the size of the L4 (UDP) header is already + * accounted for. + */ + return thlen + shinfo->gso_size; +} + +/** + * skb_gso_network_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_network_seglen is used to determine the real size of the + * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP). + * + * The MAC/L2 header is not accounted for. + */ +static unsigned int skb_gso_network_seglen(const struct sk_buff *skb) +{ + unsigned int hdr_len = skb_transport_header(skb) - + skb_network_header(skb); + + return hdr_len + skb_gso_transport_seglen(skb); +} + +/** + * skb_gso_mac_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_mac_seglen is used to determine the real size of the + * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4 + * headers (TCP/UDP). + */ +static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) +{ + unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + + return hdr_len + skb_gso_transport_seglen(skb); +} + +/** + * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS + * + * There are a couple of instances where we have a GSO skb, and we + * want to determine what size it would be after it is segmented. + * + * We might want to check: + * - L3+L4+payload size (e.g. IP forwarding) + * - L2+L3+L4+payload size (e.g. sanity check before passing to driver) + * + * This is a helper to do that correctly considering GSO_BY_FRAGS. + * + * @skb: GSO skb + * + * @seg_len: The segmented length (from skb_gso_*_seglen). In the + * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS]. + * + * @max_len: The maximum permissible length. + * + * Returns true if the segmented length <= max length. + */ +static inline bool skb_gso_size_check(const struct sk_buff *skb, + unsigned int seg_len, + unsigned int max_len) { + const struct skb_shared_info *shinfo = skb_shinfo(skb); + const struct sk_buff *iter; + + if (shinfo->gso_size != GSO_BY_FRAGS) + return seg_len <= max_len; + + /* Undo this so we can re-use header sizes */ + seg_len -= GSO_BY_FRAGS; + + skb_walk_frags(skb, iter) { + if (seg_len + skb_headlen(iter) > max_len) + return false; + } + + return true; +} + +/** + * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU? + * + * @skb: GSO skb + * @mtu: MTU to validate against + * + * skb_gso_validate_network_len validates if a given skb will fit a + * wanted MTU once split. It considers L3 headers, L4 headers, and the + * payload. + */ +bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu) +{ + return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu); +} +EXPORT_SYMBOL_GPL(skb_gso_validate_network_len); + +/** + * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length? + * + * @skb: GSO skb + * @len: length to validate against + * + * skb_gso_validate_mac_len validates if a given skb will fit a wanted + * length once split, including L2, L3 and L4 headers and the payload. + */ +bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len) +{ + return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len); +} +EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len); + diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2fe6a3379aaed..aa1743b2b770b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -5139,13 +5139,17 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (br_spec) { nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) { if (nla_len(attr) < sizeof(flags)) return -EINVAL; have_flags = true; flags = nla_get_u16(attr); - break; + } + + if (nla_type(attr) == IFLA_BRIDGE_MODE) { + if (nla_len(attr) < sizeof(u16)) + return -EINVAL; } } } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1b6a1d99869dc..593ec18e3f007 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -67,6 +67,7 @@ #include #include #include +#include #include #include #include @@ -5789,147 +5790,6 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) } EXPORT_SYMBOL_GPL(skb_scrub_packet); -/** - * skb_gso_transport_seglen - Return length of individual segments of a gso packet - * - * @skb: GSO skb - * - * skb_gso_transport_seglen is used to determine the real size of the - * individual segments, including Layer4 headers (TCP/UDP). - * - * The MAC/L2 or network (IP, IPv6) headers are not accounted for. - */ -static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) -{ - const struct skb_shared_info *shinfo = skb_shinfo(skb); - unsigned int thlen = 0; - - if (skb->encapsulation) { - thlen = skb_inner_transport_header(skb) - - skb_transport_header(skb); - - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - thlen += inner_tcp_hdrlen(skb); - } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { - thlen = tcp_hdrlen(skb); - } else if (unlikely(skb_is_gso_sctp(skb))) { - thlen = sizeof(struct sctphdr); - } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { - thlen = sizeof(struct udphdr); - } - /* UFO sets gso_size to the size of the fragmentation - * payload, i.e. the size of the L4 (UDP) header is already - * accounted for. - */ - return thlen + shinfo->gso_size; -} - -/** - * skb_gso_network_seglen - Return length of individual segments of a gso packet - * - * @skb: GSO skb - * - * skb_gso_network_seglen is used to determine the real size of the - * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP). - * - * The MAC/L2 header is not accounted for. - */ -static unsigned int skb_gso_network_seglen(const struct sk_buff *skb) -{ - unsigned int hdr_len = skb_transport_header(skb) - - skb_network_header(skb); - - return hdr_len + skb_gso_transport_seglen(skb); -} - -/** - * skb_gso_mac_seglen - Return length of individual segments of a gso packet - * - * @skb: GSO skb - * - * skb_gso_mac_seglen is used to determine the real size of the - * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4 - * headers (TCP/UDP). - */ -static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) -{ - unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); - - return hdr_len + skb_gso_transport_seglen(skb); -} - -/** - * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS - * - * There are a couple of instances where we have a GSO skb, and we - * want to determine what size it would be after it is segmented. - * - * We might want to check: - * - L3+L4+payload size (e.g. IP forwarding) - * - L2+L3+L4+payload size (e.g. sanity check before passing to driver) - * - * This is a helper to do that correctly considering GSO_BY_FRAGS. - * - * @skb: GSO skb - * - * @seg_len: The segmented length (from skb_gso_*_seglen). In the - * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS]. - * - * @max_len: The maximum permissible length. - * - * Returns true if the segmented length <= max length. - */ -static inline bool skb_gso_size_check(const struct sk_buff *skb, - unsigned int seg_len, - unsigned int max_len) { - const struct skb_shared_info *shinfo = skb_shinfo(skb); - const struct sk_buff *iter; - - if (shinfo->gso_size != GSO_BY_FRAGS) - return seg_len <= max_len; - - /* Undo this so we can re-use header sizes */ - seg_len -= GSO_BY_FRAGS; - - skb_walk_frags(skb, iter) { - if (seg_len + skb_headlen(iter) > max_len) - return false; - } - - return true; -} - -/** - * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU? - * - * @skb: GSO skb - * @mtu: MTU to validate against - * - * skb_gso_validate_network_len validates if a given skb will fit a - * wanted MTU once split. It considers L3 headers, L4 headers, and the - * payload. - */ -bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu) -{ - return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu); -} -EXPORT_SYMBOL_GPL(skb_gso_validate_network_len); - -/** - * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length? - * - * @skb: GSO skb - * @len: length to validate against - * - * skb_gso_validate_mac_len validates if a given skb will fit a wanted - * length once split, including L2, L3 and L4 headers and the payload. - */ -bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len) -{ - return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len); -} -EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len); - static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) { int mac_len, meta_len; diff --git a/net/core/sock.c b/net/core/sock.c index 4a0edccf86066..1f31a97100d4f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -800,7 +800,7 @@ EXPORT_SYMBOL(sock_no_linger); void sock_set_priority(struct sock *sk, u32 priority) { lock_sock(sk); - sk->sk_priority = priority; + WRITE_ONCE(sk->sk_priority, priority); release_sock(sk); } EXPORT_SYMBOL(sock_set_priority); @@ -984,7 +984,7 @@ EXPORT_SYMBOL(sock_set_rcvbuf); static void __sock_set_mark(struct sock *sk, u32 val) { if (val != sk->sk_mark) { - sk->sk_mark = val; + WRITE_ONCE(sk->sk_mark, val); sk_dst_reset(sk); } } @@ -1003,7 +1003,7 @@ static void sock_release_reserved_memory(struct sock *sk, int bytes) bytes = round_down(bytes, PAGE_SIZE); WARN_ON(bytes > sk->sk_reserved_mem); - sk->sk_reserved_mem -= bytes; + WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem - bytes); sk_mem_reclaim(sk); } @@ -1040,7 +1040,8 @@ static int sock_reserve_memory(struct sock *sk, int bytes) } sk->sk_forward_alloc += pages << PAGE_SHIFT; - sk->sk_reserved_mem += pages << PAGE_SHIFT; + WRITE_ONCE(sk->sk_reserved_mem, + sk->sk_reserved_mem + (pages << PAGE_SHIFT)); return 0; } @@ -1209,7 +1210,7 @@ set_sndbuf: if ((val >= 0 && val <= 6) || sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) - sk->sk_priority = val; + WRITE_ONCE(sk->sk_priority, val); else ret = -EPERM; break; @@ -1427,7 +1428,8 @@ set_sndbuf: cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); - sk->sk_max_pacing_rate = ulval; + /* Pairs with READ_ONCE() from sk_getsockopt() */ + WRITE_ONCE(sk->sk_max_pacing_rate, ulval); sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval); break; } @@ -1522,7 +1524,9 @@ set_sndbuf: } if ((u8)val == SOCK_TXREHASH_DEFAULT) val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash); - /* Paired with READ_ONCE() in tcp_rtx_synack() */ + /* Paired with READ_ONCE() in tcp_rtx_synack() + * and sk_getsockopt(). + */ WRITE_ONCE(sk->sk_txrehash, (u8)val); break; @@ -1622,11 +1626,11 @@ int sk_getsockopt(struct sock *sk, int level, int optname, break; case SO_SNDBUF: - v.val = sk->sk_sndbuf; + v.val = READ_ONCE(sk->sk_sndbuf); break; case SO_RCVBUF: - v.val = sk->sk_rcvbuf; + v.val = READ_ONCE(sk->sk_rcvbuf); break; case SO_REUSEADDR: @@ -1668,7 +1672,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname, break; case SO_PRIORITY: - v.val = sk->sk_priority; + v.val = READ_ONCE(sk->sk_priority); break; case SO_LINGER: @@ -1715,7 +1719,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname, break; case SO_RCVLOWAT: - v.val = sk->sk_rcvlowat; + v.val = READ_ONCE(sk->sk_rcvlowat); break; case SO_SNDLOWAT: @@ -1795,7 +1799,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname, optval, optlen, len); case SO_MARK: - v.val = sk->sk_mark; + v.val = READ_ONCE(sk->sk_mark); break; case SO_RCVMARK: @@ -1814,7 +1818,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname, if (!sock->ops->set_peek_off) return -EOPNOTSUPP; - v.val = sk->sk_peek_off; + v.val = READ_ONCE(sk->sk_peek_off); break; case SO_NOFCS: v.val = sock_flag(sk, SOCK_NOFCS); @@ -1844,7 +1848,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_NET_RX_BUSY_POLL case SO_BUSY_POLL: - v.val = sk->sk_ll_usec; + v.val = READ_ONCE(sk->sk_ll_usec); break; case SO_PREFER_BUSY_POLL: v.val = READ_ONCE(sk->sk_prefer_busy_poll); @@ -1852,12 +1856,14 @@ int sk_getsockopt(struct sock *sk, int level, int optname, #endif case SO_MAX_PACING_RATE: + /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */ if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) { lv = sizeof(v.ulval); - v.ulval = sk->sk_max_pacing_rate; + v.ulval = READ_ONCE(sk->sk_max_pacing_rate); } else { /* 32bit version */ - v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U); + v.val = min_t(unsigned long, ~0U, + READ_ONCE(sk->sk_max_pacing_rate)); } break; @@ -1925,11 +1931,12 @@ int sk_getsockopt(struct sock *sk, int level, int optname, break; case SO_RESERVE_MEM: - v.val = sk->sk_reserved_mem; + v.val = READ_ONCE(sk->sk_reserved_mem); break; case SO_TXREHASH: - v.val = sk->sk_txrehash; + /* Paired with WRITE_ONCE() in sk_setsockopt() */ + v.val = READ_ONCE(sk->sk_txrehash); break; default: @@ -3120,7 +3127,7 @@ EXPORT_SYMBOL(__sk_mem_reclaim); int sk_set_peek_off(struct sock *sk, int val) { - sk->sk_peek_off = val; + WRITE_ONCE(sk->sk_peek_off, val); return 0; } EXPORT_SYMBOL_GPL(sk_set_peek_off); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 00afb66cd0950..08ab108206bf8 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -32,8 +32,6 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) { struct bpf_stab *stab; - if (!capable(CAP_NET_ADMIN)) - return ERR_PTR(-EPERM); if (attr->max_entries == 0 || attr->key_size != 4 || (attr->value_size != sizeof(u32) && @@ -117,7 +115,6 @@ static void sock_map_sk_acquire(struct sock *sk) __acquires(&sk->sk_lock.slock) { lock_sock(sk); - preempt_disable(); rcu_read_lock(); } @@ -125,7 +122,6 @@ static void sock_map_sk_release(struct sock *sk) __releases(&sk->sk_lock.slock) { rcu_read_unlock(); - preempt_enable(); release_sock(sk); } @@ -1085,8 +1081,6 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) struct bpf_shtab *htab; int i, err; - if (!capable(CAP_NET_ADMIN)) - return ERR_PTR(-EPERM); if (attr->max_entries == 0 || attr->key_size == 0 || (attr->value_size != sizeof(u32) && diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index c0c4381285759..2e6b8c8fd2ded 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -980,7 +980,7 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlmsghdr *nlh, return -EOPNOTSUPP; ret = nla_parse_nested_deprecated(data, DCB_BCN_ATTR_MAX, - tb[DCB_ATTR_BCN], dcbnl_pfc_up_nest, + tb[DCB_ATTR_BCN], dcbnl_bcn_nest, NULL); if (ret) return ret; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 93c98990d7263..94b69a50c8b50 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -238,8 +238,8 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); - err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass, - sk->sk_priority); + err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt, + np->tclass, sk->sk_priority); rcu_read_unlock(); err = net_xmit_eval(err); } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4a76ebf793b85..10ebe39dcc873 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -100,6 +100,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index ee848be59e65a..10e96ed6c9e39 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 2b9cb5398335b..311e70bfce407 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -11,6 +11,7 @@ #include #include #include +#include static struct sk_buff *gre_gso_segment(struct sk_buff *skb, netdev_features_t features) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index b812eb36f0e36..f7426926a1041 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -150,7 +150,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, } #endif - if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark)) + if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, READ_ONCE(sk->sk_mark))) goto errout; if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || @@ -799,7 +799,7 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) entry.ifindex = sk->sk_bound_dev_if; entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0; if (sk_fullsock(sk)) - entry.mark = sk->sk_mark; + entry.mark = READ_ONCE(sk->sk_mark); else if (sk->sk_state == TCP_NEW_SYN_RECV) entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark; else if (sk->sk_state == TCP_TIME_WAIT) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a1bead441026e..6f6f63cf9224f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include @@ -183,9 +184,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, ip_options_build(skb, &opt->opt, daddr, rt); } - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); if (!skb->mark) - skb->mark = sk->sk_mark; + skb->mark = READ_ONCE(sk->sk_mark); /* Send it out. */ return ip_local_out(net, skb->sk, skb); @@ -527,8 +528,8 @@ packet_routed: skb_shinfo(skb)->gso_segs ?: 1); /* TODO : should we use skb->sk here instead of sk ? */ - skb->priority = sk->sk_priority; - skb->mark = sk->sk_mark; + skb->priority = READ_ONCE(sk->sk_priority); + skb->mark = READ_ONCE(sk->sk_mark); res = ip_local_out(net, sk, skb); rcu_read_unlock(); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 8e97d8d4cc9d9..d41bce8927b2c 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -592,7 +592,7 @@ void __ip_sock_set_tos(struct sock *sk, int val) } if (inet_sk(sk)->tos != val) { inet_sk(sk)->tos = val; - sk->sk_priority = rt_tos2priority(val); + WRITE_ONCE(sk->sk_priority, rt_tos2priority(val)); sk_dst_reset(sk); } } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index eadf1c9ef7e49..fb31624019435 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -348,7 +348,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, goto error; skb_reserve(skb, hlen); - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; skb_dst_set(skb, &rt->dst); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 98d7e6ba7493b..92fede388d520 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -518,7 +518,7 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4, const struct inet_sock *inet = inet_sk(sk); oif = sk->sk_bound_dev_if; - mark = sk->sk_mark; + mark = READ_ONCE(sk->sk_mark); tos = ip_sock_rt_tos(sk); scope = ip_sock_rt_scope(sk); prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol; @@ -552,7 +552,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) inet_opt = rcu_dereference(inet->inet_opt); if (inet_opt && inet_opt->opt.srr) daddr = inet_opt->opt.faddr; - flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk) & IPTOS_RT_MASK, ip_sock_rt_scope(sk), inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f37d13ee7b4cc..498dd4acdeec8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -931,9 +931,9 @@ static void tcp_v4_send_ack(const struct sock *sk, ctl_sk = this_cpu_read(ipv4_tcp_sk); sock_net_set(ctl_sk, net); ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? - inet_twsk(sk)->tw_mark : sk->sk_mark; + inet_twsk(sk)->tw_mark : READ_ONCE(sk->sk_mark); ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? - inet_twsk(sk)->tw_priority : sk->sk_priority; + inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority); transmit_time = tcp_transmit_time(sk); ip_send_unicast_reply(ctl_sk, skb, &TCP_SKB_CB(skb)->header.h4.opt, diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 82f4575f9cd90..99ac5efe244d3 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -40,7 +40,7 @@ struct tcp_fastopen_metrics { struct tcp_metrics_block { struct tcp_metrics_block __rcu *tcpm_next; - possible_net_t tcpm_net; + struct net *tcpm_net; struct inetpeer_addr tcpm_saddr; struct inetpeer_addr tcpm_daddr; unsigned long tcpm_stamp; @@ -51,34 +51,38 @@ struct tcp_metrics_block { struct rcu_head rcu_head; }; -static inline struct net *tm_net(struct tcp_metrics_block *tm) +static inline struct net *tm_net(const struct tcp_metrics_block *tm) { - return read_pnet(&tm->tcpm_net); + /* Paired with the WRITE_ONCE() in tcpm_new() */ + return READ_ONCE(tm->tcpm_net); } static bool tcp_metric_locked(struct tcp_metrics_block *tm, enum tcp_metric_index idx) { - return tm->tcpm_lock & (1 << idx); + /* Paired with WRITE_ONCE() in tcpm_suck_dst() */ + return READ_ONCE(tm->tcpm_lock) & (1 << idx); } -static u32 tcp_metric_get(struct tcp_metrics_block *tm, +static u32 tcp_metric_get(const struct tcp_metrics_block *tm, enum tcp_metric_index idx) { - return tm->tcpm_vals[idx]; + /* Paired with WRITE_ONCE() in tcp_metric_set() */ + return READ_ONCE(tm->tcpm_vals[idx]); } static void tcp_metric_set(struct tcp_metrics_block *tm, enum tcp_metric_index idx, u32 val) { - tm->tcpm_vals[idx] = val; + /* Paired with READ_ONCE() in tcp_metric_get() */ + WRITE_ONCE(tm->tcpm_vals[idx], val); } static bool addr_same(const struct inetpeer_addr *a, const struct inetpeer_addr *b) { - return inetpeer_addr_cmp(a, b) == 0; + return (a->family == b->family) && !inetpeer_addr_cmp(a, b); } struct tcpm_hash_bucket { @@ -89,6 +93,7 @@ static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly; static unsigned int tcp_metrics_hash_log __read_mostly; static DEFINE_SPINLOCK(tcp_metrics_lock); +static DEFINE_SEQLOCK(fastopen_seqlock); static void tcpm_suck_dst(struct tcp_metrics_block *tm, const struct dst_entry *dst, @@ -97,7 +102,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, u32 msval; u32 val; - tm->tcpm_stamp = jiffies; + WRITE_ONCE(tm->tcpm_stamp, jiffies); val = 0; if (dst_metric_locked(dst, RTAX_RTT)) @@ -110,30 +115,42 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, val |= 1 << TCP_METRIC_CWND; if (dst_metric_locked(dst, RTAX_REORDERING)) val |= 1 << TCP_METRIC_REORDERING; - tm->tcpm_lock = val; + /* Paired with READ_ONCE() in tcp_metric_locked() */ + WRITE_ONCE(tm->tcpm_lock, val); msval = dst_metric_raw(dst, RTAX_RTT); - tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC; + tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC); msval = dst_metric_raw(dst, RTAX_RTTVAR); - tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC; - tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); - tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); - tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); + tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC); + tcp_metric_set(tm, TCP_METRIC_SSTHRESH, + dst_metric_raw(dst, RTAX_SSTHRESH)); + tcp_metric_set(tm, TCP_METRIC_CWND, + dst_metric_raw(dst, RTAX_CWND)); + tcp_metric_set(tm, TCP_METRIC_REORDERING, + dst_metric_raw(dst, RTAX_REORDERING)); if (fastopen_clear) { + write_seqlock(&fastopen_seqlock); tm->tcpm_fastopen.mss = 0; tm->tcpm_fastopen.syn_loss = 0; tm->tcpm_fastopen.try_exp = 0; tm->tcpm_fastopen.cookie.exp = false; tm->tcpm_fastopen.cookie.len = 0; + write_sequnlock(&fastopen_seqlock); } } #define TCP_METRICS_TIMEOUT (60 * 60 * HZ) -static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) +static void tcpm_check_stamp(struct tcp_metrics_block *tm, + const struct dst_entry *dst) { - if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) + unsigned long limit; + + if (!tm) + return; + limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT; + if (unlikely(time_after(jiffies, limit))) tcpm_suck_dst(tm, dst, false); } @@ -174,20 +191,23 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, oldest = deref_locked(tcp_metrics_hash[hash].chain); for (tm = deref_locked(oldest->tcpm_next); tm; tm = deref_locked(tm->tcpm_next)) { - if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp)) + if (time_before(READ_ONCE(tm->tcpm_stamp), + READ_ONCE(oldest->tcpm_stamp))) oldest = tm; } tm = oldest; } else { - tm = kmalloc(sizeof(*tm), GFP_ATOMIC); + tm = kzalloc(sizeof(*tm), GFP_ATOMIC); if (!tm) goto out_unlock; } - write_pnet(&tm->tcpm_net, net); + /* Paired with the READ_ONCE() in tm_net() */ + WRITE_ONCE(tm->tcpm_net, net); + tm->tcpm_saddr = *saddr; tm->tcpm_daddr = *daddr; - tcpm_suck_dst(tm, dst, true); + tcpm_suck_dst(tm, dst, reclaim); if (likely(!reclaim)) { tm->tcpm_next = tcp_metrics_hash[hash].chain; @@ -434,7 +454,7 @@ void tcp_update_metrics(struct sock *sk) tp->reordering); } } - tm->tcpm_stamp = jiffies; + WRITE_ONCE(tm->tcpm_stamp, jiffies); out_unlock: rcu_read_unlock(); } @@ -539,8 +559,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) return ret; } -static DEFINE_SEQLOCK(fastopen_seqlock); - void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie) { @@ -647,7 +665,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, } if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE, - jiffies - tm->tcpm_stamp, + jiffies - READ_ONCE(tm->tcpm_stamp), TCP_METRICS_ATTR_PAD) < 0) goto nla_put_failure; @@ -658,7 +676,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, if (!nest) goto nla_put_failure; for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) { - u32 val = tm->tcpm_vals[i]; + u32 val = tcp_metric_get(tm, i); if (!val) continue; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 4851211aa60d6..9c51ee9ccd4c0 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9482def1f3103..6d327d6d978c5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -103,6 +103,7 @@ #include #include #include +#include #include #include #include @@ -113,6 +114,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_IPV6) #include #endif @@ -554,10 +556,13 @@ struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, { const struct iphdr *iph = ip_hdr(skb); struct net *net = dev_net(skb->dev); + int iif, sdif; + + inet_get_iif_sdif(skb, &iif, &sdif); return __udp4_lib_lookup(net, iph->saddr, sport, - iph->daddr, dport, inet_iif(skb), - inet_sdif(skb), net->ipv4.udp_table, NULL); + iph->daddr, dport, iif, + sdif, net->ipv4.udp_table, NULL); } /* Must be called under rcu_read_lock(). diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 4a61832e7f69b..0f46b3c2e4ac5 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -608,10 +609,13 @@ static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport, { const struct iphdr *iph = skb_gro_network_header(skb); struct net *net = dev_net(skb->dev); + int iif, sdif; + + inet_get_iif_sdif(skb, &iif, &sdif); return __udp4_lib_lookup(net, iph->saddr, sport, - iph->daddr, dport, inet_iif(skb), - inet_sdif(skb), net->ipv4.udp_table, NULL); + iph->daddr, dport, iif, + sdif, net->ipv4.udp_table, NULL); } INDIRECT_CALLABLE_SCOPE diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 7723402689973..a189e08370a5e 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 00dc2e3b01845..d6314287338da 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "ip6_offload.h" diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 9554cf46ed888..4a27fab1d09a3 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -42,6 +42,7 @@ #include #include +#include #include #include #include diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 51cf37abd142d..b4152b5d68ffb 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1073,7 +1073,7 @@ static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, And all this only to mangle msg->im6_msgtype and to set msg->im6_mbz to "mbz" :-) */ - skb_push(skb, -skb_network_offset(pkt)); + __skb_pull(skb, skb_network_offset(pkt)); skb_push(skb, sizeof(*msg)); skb_reset_transport_header(skb); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index f804c11e2146c..c2c291827a2ce 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -120,7 +120,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipcm6_init_sk(&ipc6, np); ipc6.sockc.tsflags = sk->sk_tsflags; - ipc6.sockc.mark = sk->sk_mark; + ipc6.sockc.mark = READ_ONCE(sk->sk_mark); fl6.flowi6_oif = oif; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 44ee7a2e72ac2..d85d2082aeb77 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -614,7 +614,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb_reserve(skb, hlen); skb->protocol = htons(ETH_P_IPV6); - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; @@ -774,12 +774,12 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) */ memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = READ_ONCE(sk->sk_mark); fl6.flowi6_uid = sk->sk_uid; ipcm6_init(&ipc6); ipc6.sockc.tsflags = sk->sk_tsflags; - ipc6.sockc.mark = sk->sk_mark; + ipc6.sockc.mark = fl6.flowi6_mark; if (sin6) { if (addr_len < SIN6_LEN_RFC2133) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 392aaa373b667..d5c6be77ec1ea 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2951,7 +2951,8 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) if (!oif && skb->dev) oif = l3mdev_master_ifindex(skb->dev); - ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid); + ip6_update_pmtu(skb, sock_net(sk), mtu, oif, READ_ONCE(sk->sk_mark), + sk->sk_uid); dst = __sk_dst_get(sk); if (!dst || !dst->obsolete || @@ -3172,8 +3173,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif) void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) { - ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark, - sk->sk_uid); + ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, + READ_ONCE(sk->sk_mark), sk->sk_uid); } EXPORT_SYMBOL_GPL(ip6_sk_redirect); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f7c248a7f8d1d..3155692a0e06b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -568,8 +568,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); - err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt, - tclass, sk->sk_priority); + err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), + opt, tclass, sk->sk_priority); rcu_read_unlock(); err = net_xmit_eval(err); } @@ -943,7 +943,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 if (sk->sk_state == TCP_TIME_WAIT) mark = inet_twsk(sk)->tw_mark; else - mark = sk->sk_mark; + mark = READ_ONCE(sk->sk_mark); skb_set_delivery_time(buff, tcp_transmit_time(sk), true); } if (txhash) { @@ -1132,7 +1132,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, READ_ONCE(req->ts_recent), sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), - ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority, + ipv6_get_dsfield(ipv6_hdr(skb)), 0, + READ_ONCE(sk->sk_priority), READ_ONCE(tcp_rsk(req)->txhash)); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d594a0425749b..8521729fb2375 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -300,10 +301,13 @@ struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, { const struct ipv6hdr *iph = ipv6_hdr(skb); struct net *net = dev_net(skb->dev); + int iif, sdif; + + inet6_get_iif_sdif(skb, &iif, &sdif); return __udp6_lib_lookup(net, &iph->saddr, sport, - &iph->daddr, dport, inet6_iif(skb), - inet6_sdif(skb), net->ipv4.udp_table, NULL); + &iph->daddr, dport, iif, + sdif, net->ipv4.udp_table, NULL); } /* Must be called under rcu_read_lock(). @@ -624,7 +628,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == NDISC_REDIRECT) { if (tunnel) { ip6_redirect(skb, sock_net(sk), inet6_iif(skb), - sk->sk_mark, sk->sk_uid); + READ_ONCE(sk->sk_mark), sk->sk_uid); } else { ip6_sk_redirect(skb, sk); } @@ -1356,7 +1360,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipcm6_init(&ipc6); ipc6.gso_size = READ_ONCE(up->gso_size); ipc6.sockc.tsflags = sk->sk_tsflags; - ipc6.sockc.mark = sk->sk_mark; + ipc6.sockc.mark = READ_ONCE(sk->sk_mark); /* destination address check */ if (sin6) { diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index e0e10f6bcdc18..6b95ba241ebe2 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -14,6 +14,7 @@ #include #include "ip6_offload.h" #include +#include static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, netdev_features_t features) @@ -117,10 +118,13 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport, { const struct ipv6hdr *iph = skb_gro_network_header(skb); struct net *net = dev_net(skb->dev); + int iif, sdif; + + inet6_get_iif_sdif(skb, &iif, &sdif); return __udp6_lib_lookup(net, &iph->saddr, sport, - &iph->daddr, dport, inet6_iif(skb), - inet6_sdif(skb), net->ipv4.udp_table, NULL); + &iph->daddr, dport, iif, + sdif, net->ipv4.udp_table, NULL); } INDIRECT_CALLABLE_SCOPE diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 5137ea1861ce2..bce4132b0a5c8 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -519,7 +519,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) /* Get and verify the address */ memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = READ_ONCE(sk->sk_mark); fl6.flowi6_uid = sk->sk_uid; ipcm6_init(&ipc6); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 13b522dab0a3d..39ca4a8fe7b32 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "ieee80211_i.h" #include "driver-ops.h" diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index dc5165d3eec4e..bf6e81d562631 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 1482259de9b5d..533d082f0701e 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -14,6 +14,7 @@ #include #include #include +#include #include static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index d4258869ac48f..64fcfc3d5270f 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -102,7 +102,7 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in break; case SO_MARK: if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { - ssk->sk_mark = sk->sk_mark; + WRITE_ONCE(ssk->sk_mark, sk->sk_mark); sk_dst_reset(ssk); } break; diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 3bbaf9c7ea46a..7eba00f6c6b6a 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index e311462f6d98d..556bc902af00f 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 85f8df87efdaa..1dd336a3ce786 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -107,7 +107,7 @@ static void nft_socket_eval(const struct nft_expr *expr, break; case NFT_SOCKET_MARK: if (sk_fullsock(sk)) { - *dest = sk->sk_mark; + *dest = READ_ONCE(sk->sk_mark); } else { regs->verdict.code = NFT_BREAK; return; diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 7013f55f05d1e..76e01f292aaff 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -77,7 +77,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && transparent && sk_fullsock(sk)) - pskb->mark = sk->sk_mark; + pskb->mark = READ_ONCE(sk->sk_mark); if (sk != skb->sk) sock_gen_put(sk); @@ -138,7 +138,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && transparent && sk_fullsock(sk)) - pskb->mark = sk->sk_mark; + pskb->mark = READ_ONCE(sk->sk_mark); if (sk != skb->sk) sock_gen_put(sk); diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c index 0f23e5e8e03eb..f4a38bd6a7e04 100644 --- a/net/nsh/nsh.c +++ b/net/nsh/nsh.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index a8cf9a88758ef..8074ea00d577e 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 58f530f60172a..a6d2a0b1aa21e 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a2dbeb264f260..a753246ef1657 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2050,8 +2050,8 @@ retry: skb->protocol = proto; skb->dev = dev; - skb->priority = sk->sk_priority; - skb->mark = sk->sk_mark; + skb->priority = READ_ONCE(sk->sk_priority); + skb->mark = READ_ONCE(sk->sk_mark); skb->tstamp = sockc.transmit_time; skb_setup_tx_timestamp(skb, sockc.tsflags); @@ -2585,8 +2585,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->protocol = proto; skb->dev = dev; - skb->priority = po->sk.sk_priority; - skb->mark = po->sk.sk_mark; + skb->priority = READ_ONCE(po->sk.sk_priority); + skb->mark = READ_ONCE(po->sk.sk_mark); skb->tstamp = sockc->transmit_time; skb_setup_tx_timestamp(skb, sockc->tsflags); skb_zcopy_set_nouarg(skb, ph.raw); @@ -2988,7 +2988,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) goto out_unlock; sockcm_init(&sockc, sk); - sockc.mark = sk->sk_mark; + sockc.mark = READ_ONCE(sk->sk_mark); if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); if (unlikely(err)) @@ -3061,7 +3061,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->protocol = proto; skb->dev = dev; - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc.mark; skb->tstamp = sockc.transmit_time; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 2e9dce03d1ecc..f3121c5a85e9f 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 8641f80593179..c49d6af0e0480 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -267,7 +267,6 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, return -ENOBUFS; fnew->id = f->id; - fnew->res = f->res; fnew->ifindex = f->ifindex; fnew->tp = f->tp; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index d0c53724d3e86..1e20bbd687f1d 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -513,7 +513,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (fold) { f->id = fold->id; f->iif = fold->iif; - f->res = fold->res; f->handle = fold->handle; f->tp = fold->tp; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 5abf31e432caf..da4c179a4d418 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -826,7 +826,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, new->ifindex = n->ifindex; new->fshift = n->fshift; - new->res = n->res; new->flags = n->flags; RCU_INIT_POINTER(new->ht_down, ht); @@ -1024,18 +1023,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } + /* At this point, we need to derive the new handle that will be used to + * uniquely map the identity of this table match entry. The + * identity of the entry that we need to construct is 32 bits made of: + * htid(12b):bucketid(8b):node/entryid(12b) + * + * At this point _we have the table(ht)_ in which we will insert this + * entry. We carry the table's id in variable "htid". + * Note that earlier code picked the ht selection either by a) the user + * providing the htid specified via TCA_U32_HASH attribute or b) when + * no such attribute is passed then the root ht, is default to at ID + * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0. + * If OTOH the user passed us the htid, they may also pass a bucketid of + * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is + * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be + * passed via the htid, so even if it was non-zero it will be ignored. + * + * We may also have a handle, if the user passed one. The handle also + * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b). + * Rule: the bucketid on the handle is ignored even if one was passed; + * rather the value on "htid" is always assumed to be the bucketid. + */ if (handle) { + /* Rule: The htid from handle and tableid from htid must match */ if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) { NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch"); return -EINVAL; } - handle = htid | TC_U32_NODE(handle); - err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle, - GFP_KERNEL); - if (err) - return err; - } else + /* Ok, so far we have a valid htid(12b):bucketid(8b) but we + * need to finalize the table entry identification with the last + * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for + * entries. Rule: nodeid of 0 is reserved only for tables(see + * earlier code which processes TC_U32_DIVISOR attribute). + * Rule: The nodeid can only be derived from the handle (and not + * htid). + * Rule: if the handle specified zero for the node id example + * 0x60000000, then pick a new nodeid from the pool of IDs + * this hash table has been allocating from. + * If OTOH it is specified (i.e for example the user passed a + * handle such as 0x60000123), then we use it generate our final + * handle which is used to uniquely identify the match entry. + */ + if (!TC_U32_NODE(handle)) { + handle = gen_new_kid(ht, htid); + } else { + handle = htid | TC_U32_NODE(handle); + err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, + handle, GFP_KERNEL); + if (err) + return err; + } + } else { + /* The user did not give us a handle; lets just generate one + * from the table's pool of nodeids. + */ handle = gen_new_kid(ht, htid); + } if (tb[TCA_U32_SEL] == NULL) { NL_SET_ERR_MSG_MOD(extack, "Selector not specified"); diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 891e007d5c0bf..9cff99558694d 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index b93ec2a3454eb..38d9aa0cd30e7 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 4caf80ddc6721..97afa244e54f5 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -1012,6 +1013,11 @@ static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { TC_FP_PREEMPTIBLE), }; +static struct netlink_range_validation_signed taprio_cycle_time_range = { + .min = 0, + .max = INT_MAX, +}; + static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_PRIOMAP] = { .len = sizeof(struct tc_mqprio_qopt) @@ -1020,7 +1026,8 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 }, [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED }, [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, - [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 }, + [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = + NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range), [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 }, [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 }, [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 }, @@ -1156,6 +1163,11 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb, return -EINVAL; } + if (cycle < 0 || cycle > INT_MAX) { + NL_SET_ERR_MSG(extack, "'cycle_time' is too big"); + return -EINVAL; + } + new->cycle_time = cycle; } @@ -1344,7 +1356,7 @@ static void setup_txtime(struct taprio_sched *q, struct sched_gate_list *sched, ktime_t base) { struct sched_entry *entry; - u32 interval = 0; + u64 interval = 0; list_for_each_entry(entry, &sched->entries, list) { entry->next_txtime = ktime_add_ns(base, interval); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 277ad11f4d613..17d2d00ddb182 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sctp/offload.c b/net/sctp/offload.c index eb874e3c399a5..502095173d885 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -22,6 +22,7 @@ #include #include #include +#include static __le32 sctp_gso_make_checksum(struct sk_buff *skb) { diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 538e9c6ec8c98..fa6b54c1411cb 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -445,7 +445,7 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, nsk->sk_rcvbuf = osk->sk_rcvbuf; nsk->sk_sndtimeo = osk->sk_sndtimeo; nsk->sk_rcvtimeo = osk->sk_rcvtimeo; - nsk->sk_mark = osk->sk_mark; + nsk->sk_mark = READ_ONCE(osk->sk_mark); nsk->sk_priority = osk->sk_priority; nsk->sk_rcvlowat = osk->sk_rcvlowat; nsk->sk_bound_dev_if = osk->sk_bound_dev_if; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e7728b57a8c70..10615878e3961 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -780,7 +780,7 @@ static int unix_set_peek_off(struct sock *sk, int val) if (mutex_lock_interruptible(&u->iolock)) return -EINTR; - sk->sk_peek_off = val; + WRITE_ONCE(sk->sk_peek_off, val); mutex_unlock(&u->iolock); return 0; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 396c63431e1f3..e9a3b0f724f18 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -640,7 +640,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies, ret = cfg80211_calc_short_ssid(ies, &ssid_elem, &s_ssid_tmp); if (ret) - return ret; + return 0; /* RNR IE may contain more than one NEIGHBOR_AP_INFO */ while (pos + sizeof(*ap_info) <= end) { diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 32dd55b9ce8a8..35e518eaaebae 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -505,7 +505,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, skb->dev = dev; skb->priority = xs->sk.sk_priority; - skb->mark = xs->sk.sk_mark; + skb->mark = READ_ONCE(xs->sk.sk_mark); skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr; skb->destructor = xsk_destruct_skb; diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 2c1427074a3bb..e1c526f97ce31 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -5,7 +5,6 @@ #include #include -#include #include #include #include @@ -68,9 +67,6 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) int numa_node; u64 size; - if (!capable(CAP_NET_ADMIN)) - return ERR_PTR(-EPERM); - if (attr->max_entries == 0 || attr->key_size != 4 || attr->value_size != 4 || attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)) diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 408f5e55744ed..533697e2488f2 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 35279c220bd78..a3319965470a7 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -33,6 +33,7 @@ #include #include +#include #include #include #include diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 369e5de8558ff..662c83beb345e 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index e7617c9959c31..d6b405782b636 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2250,7 +2250,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, match = xfrm_selector_match(&pol->selector, fl, family); if (match) { - if ((sk->sk_mark & pol->mark.m) != pol->mark.v || + if ((READ_ONCE(sk->sk_mark) & pol->mark.m) != pol->mark.v || pol->if_id != if_id) { pol = NULL; goto out; diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 50e7a76d54550..d17da84d3929f 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -12,5 +12,6 @@ #include /* `bindgen` gets confused at certain things. */ +const size_t BINDINGS_ARCH_SLAB_MINALIGN = ARCH_SLAB_MINALIGN; const gfp_t BINDINGS_GFP_KERNEL = GFP_KERNEL; const gfp_t BINDINGS___GFP_ZERO = __GFP_ZERO; diff --git a/rust/kernel/allocator.rs b/rust/kernel/allocator.rs index 397a3dd57a9b1..9363b527be664 100644 --- a/rust/kernel/allocator.rs +++ b/rust/kernel/allocator.rs @@ -9,6 +9,36 @@ use crate::bindings; struct KernelAllocator; +/// Calls `krealloc` with a proper size to alloc a new object aligned to `new_layout`'s alignment. +/// +/// # Safety +/// +/// - `ptr` can be either null or a pointer which has been allocated by this allocator. +/// - `new_layout` must have a non-zero size. +unsafe fn krealloc_aligned(ptr: *mut u8, new_layout: Layout, flags: bindings::gfp_t) -> *mut u8 { + // Customized layouts from `Layout::from_size_align()` can have size < align, so pad first. + let layout = new_layout.pad_to_align(); + + let mut size = layout.size(); + + if layout.align() > bindings::BINDINGS_ARCH_SLAB_MINALIGN { + // The alignment requirement exceeds the slab guarantee, thus try to enlarge the size + // to use the "power-of-two" size/alignment guarantee (see comments in `kmalloc()` for + // more information). + // + // Note that `layout.size()` (after padding) is guaranteed to be a multiple of + // `layout.align()`, so `next_power_of_two` gives enough alignment guarantee. + size = size.next_power_of_two(); + } + + // SAFETY: + // - `ptr` is either null or a pointer returned from a previous `k{re}alloc()` by the + // function safety requirement. + // - `size` is greater than 0 since it's either a `layout.size()` (which cannot be zero + // according to the function safety requirement) or a result from `next_power_of_two()`. + unsafe { bindings::krealloc(ptr as *const core::ffi::c_void, size, flags) as *mut u8 } +} + unsafe impl GlobalAlloc for KernelAllocator { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { // `krealloc()` is used instead of `kmalloc()` because the latter is @@ -30,10 +60,20 @@ static ALLOCATOR: KernelAllocator = KernelAllocator; // to extract the object file that has them from the archive. For the moment, // let's generate them ourselves instead. // +// Note: Although these are *safe* functions, they are called by the compiler +// with parameters that obey the same `GlobalAlloc` function safety +// requirements: size and align should form a valid layout, and size is +// greater than 0. +// // Note that `#[no_mangle]` implies exported too, nowadays. #[no_mangle] -fn __rust_alloc(size: usize, _align: usize) -> *mut u8 { - unsafe { bindings::krealloc(core::ptr::null(), size, bindings::GFP_KERNEL) as *mut u8 } +fn __rust_alloc(size: usize, align: usize) -> *mut u8 { + // SAFETY: See assumption above. + let layout = unsafe { Layout::from_size_align_unchecked(size, align) }; + + // SAFETY: `ptr::null_mut()` is null, per assumption above the size of `layout` is greater + // than 0. + unsafe { krealloc_aligned(ptr::null_mut(), layout, bindings::GFP_KERNEL) } } #[no_mangle] @@ -42,23 +82,27 @@ fn __rust_dealloc(ptr: *mut u8, _size: usize, _align: usize) { } #[no_mangle] -fn __rust_realloc(ptr: *mut u8, _old_size: usize, _align: usize, new_size: usize) -> *mut u8 { - unsafe { - bindings::krealloc( - ptr as *const core::ffi::c_void, - new_size, - bindings::GFP_KERNEL, - ) as *mut u8 - } +fn __rust_realloc(ptr: *mut u8, _old_size: usize, align: usize, new_size: usize) -> *mut u8 { + // SAFETY: See assumption above. + let new_layout = unsafe { Layout::from_size_align_unchecked(new_size, align) }; + + // SAFETY: Per assumption above, `ptr` is allocated by `__rust_*` before, and the size of + // `new_layout` is greater than 0. + unsafe { krealloc_aligned(ptr, new_layout, bindings::GFP_KERNEL) } } #[no_mangle] -fn __rust_alloc_zeroed(size: usize, _align: usize) -> *mut u8 { +fn __rust_alloc_zeroed(size: usize, align: usize) -> *mut u8 { + // SAFETY: See assumption above. + let layout = unsafe { Layout::from_size_align_unchecked(size, align) }; + + // SAFETY: `ptr::null_mut()` is null, per assumption above the size of `layout` is greater + // than 0. unsafe { - bindings::krealloc( - core::ptr::null(), - size, + krealloc_aligned( + ptr::null_mut(), + layout, bindings::GFP_KERNEL | bindings::__GFP_ZERO, - ) as *mut u8 + ) } } diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index ef1ed645097c6..ce0d1c7578348 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -56,10 +56,11 @@ double perf_pmu__cpu_slots_per_cycle(void) perf_pmu__pathname_scnprintf(path, sizeof(path), pmu->name, "caps/slots"); /* - * The value of slots is not greater than 32 bits, but sysfs__read_int - * can't read value with 0x prefix, so use sysfs__read_ull instead. + * The value of slots is not greater than 32 bits, but + * filename__read_int can't read value with 0x prefix, + * so use filename__read_ull instead. */ - sysfs__read_ull(path, &slots); + filename__read_ull(path, &slots); } return slots ? (double)slots : NAN; diff --git a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh index 00d2e0e2e0c28..319f36ebb9a40 100644 --- a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh +++ b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh @@ -4,6 +4,12 @@ set -e +# skip if there's no gcc +if ! [ -x "$(command -v gcc)" ]; then + echo "failed: no gcc compiler" + exit 2 +fi + temp_dir=$(mktemp -d /tmp/perf-uprobe-different-cu-sh.XXXXXXXXXX) cleanup() @@ -11,7 +17,7 @@ cleanup() trap - EXIT TERM INT if [[ "${temp_dir}" =~ ^/tmp/perf-uprobe-different-cu-sh.*$ ]]; then echo "--- Cleaning up ---" - perf probe -x ${temp_dir}/testfile -d foo + perf probe -x ${temp_dir}/testfile -d foo || true rm -f "${temp_dir}/"* rmdir "${temp_dir}" fi diff --git a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c index 8383a99f610fd..0adf8d9475cb2 100644 --- a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c +++ b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c @@ -171,7 +171,11 @@ static void test_unpriv_bpf_disabled_negative(struct test_unpriv_bpf_disabled *s prog_insns, prog_insn_cnt, &load_opts), -EPERM, "prog_load_fails"); - for (i = BPF_MAP_TYPE_HASH; i <= BPF_MAP_TYPE_BLOOM_FILTER; i++) + /* some map types require particular correct parameters which could be + * sanity-checked before enforcing -EPERM, so only validate that + * the simple ARRAY and HASH maps are failing with -EPERM + */ + for (i = BPF_MAP_TYPE_HASH; i <= BPF_MAP_TYPE_ARRAY; i++) ASSERT_EQ(bpf_map_create(i, NULL, sizeof(int), sizeof(int), 1, NULL), -EPERM, "map_create_fails"); diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c index 0e04f9fef9867..a148181641026 100644 --- a/tools/testing/selftests/net/so_incoming_cpu.c +++ b/tools/testing/selftests/net/so_incoming_cpu.c @@ -159,7 +159,7 @@ void create_clients(struct __test_metadata *_metadata, /* Make sure SYN will be processed on the i-th CPU * and finally distributed to the i-th listener. */ - sched_setaffinity(0, sizeof(cpu_set), &cpu_set); + ret = sched_setaffinity(0, sizeof(cpu_set), &cpu_set); ASSERT_EQ(ret, 0); for (j = 0; j < CLIENT_PER_SERVER; j++) { diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 4e4aa006004c8..a723da2532441 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -34,9 +34,17 @@ #include "../kselftest.h" #include "rseq.h" -static const ptrdiff_t *libc_rseq_offset_p; -static const unsigned int *libc_rseq_size_p; -static const unsigned int *libc_rseq_flags_p; +/* + * Define weak versions to play nice with binaries that are statically linked + * against a libc that doesn't support registering its own rseq. + */ +__weak ptrdiff_t __rseq_offset; +__weak unsigned int __rseq_size; +__weak unsigned int __rseq_flags; + +static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset; +static const unsigned int *libc_rseq_size_p = &__rseq_size; +static const unsigned int *libc_rseq_flags_p = &__rseq_flags; /* Offset from the thread pointer to the rseq area. */ ptrdiff_t rseq_offset; @@ -155,9 +163,17 @@ unsigned int get_rseq_feature_size(void) static __attribute__((constructor)) void rseq_init(void) { - libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); - libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); - libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); + /* + * If the libc's registered rseq size isn't already valid, it may be + * because the binary is dynamically linked and not necessarily due to + * libc not having registered a restartable sequence. Try to find the + * symbols if that's the case. + */ + if (!*libc_rseq_size_p) { + libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); + libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); + libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); + } if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && *libc_rseq_size_p != 0) { /* rseq registration owned by glibc */ diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json index a44455372646a..08d4861c2e782 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json @@ -131,5 +131,30 @@ "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" ] + }, + { + "id": "3e1e", + "name": "Add taprio Qdisc with an invalid cycle-time", + "category": [ + "qdisc", + "taprio" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 8\" > /sys/bus/netdevsim/new_device", + "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 clockid CLOCK_TAI cycle-time 4294967296 || /bin/true", + "$IP link set dev $ETH up", + "$IP addr add 10.10.10.10/24 dev $ETH" + ], + "cmdUnderTest": "/bin/true", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc taprio 1: root refcnt", + "matchCount": "0", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] } ] diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile index 43a254f0e14dd..21a98ba565ab5 100644 --- a/tools/testing/vsock/Makefile +++ b/tools/testing/vsock/Makefile @@ -8,5 +8,5 @@ vsock_perf: vsock_perf.o CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE .PHONY: all test clean clean: - ${RM} *.o *.d vsock_test vsock_diag_test + ${RM} *.o *.d vsock_test vsock_diag_test vsock_perf -include *.d