diff --git a/MAINTAINERS b/MAINTAINERS index 4f7ac27d8651..d1aeebb59e6a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8704,8 +8704,10 @@ L: isdn4linux@listserv.isdn4linux.de (subscribers-only) L: netdev@vger.kernel.org W: http://www.isdn4linux.de S: Maintained -F: drivers/isdn/mISDN -F: drivers/isdn/hardware +F: drivers/isdn/mISDN/ +F: drivers/isdn/hardware/ +F: drivers/isdn/Kconfig +F: drivers/isdn/Makefile ISDN/CAPI SUBSYSTEM M: Karsten Keil diff --git a/Makefile b/Makefile index b6c151fd5227..2f55d377f0db 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 18 +SUBLEVEL = 19 EXTRAVERSION = NAME = Kleptomaniac Octopus diff --git a/arch/Kconfig b/arch/Kconfig index 5f8a5d84dbbe..43102756304c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -396,9 +396,6 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE config HAVE_RCU_TABLE_FREE bool -config HAVE_RCU_TABLE_NO_INVALIDATE - bool - config HAVE_MMU_GATHER_PAGE_SIZE bool diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 40002416efec..8e995ec796c8 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -14,13 +14,25 @@ #include /* arm64 compatibility macros */ +#define PSR_AA32_MODE_FIQ FIQ_MODE +#define PSR_AA32_MODE_SVC SVC_MODE #define PSR_AA32_MODE_ABT ABT_MODE #define PSR_AA32_MODE_UND UND_MODE #define PSR_AA32_T_BIT PSR_T_BIT +#define PSR_AA32_F_BIT PSR_F_BIT #define PSR_AA32_I_BIT PSR_I_BIT #define PSR_AA32_A_BIT PSR_A_BIT #define PSR_AA32_E_BIT PSR_E_BIT #define PSR_AA32_IT_MASK PSR_IT_MASK +#define PSR_AA32_GE_MASK 0x000f0000 +#define PSR_AA32_DIT_BIT 0x00200000 +#define PSR_AA32_PAN_BIT 0x00400000 +#define PSR_AA32_SSBS_BIT 0x00800000 +#define PSR_AA32_Q_BIT PSR_Q_BIT +#define PSR_AA32_V_BIT PSR_V_BIT +#define PSR_AA32_C_BIT PSR_C_BIT +#define PSR_AA32_Z_BIT PSR_Z_BIT +#define PSR_AA32_N_BIT PSR_N_BIT unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); @@ -41,6 +53,11 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) *__vcpu_spsr(vcpu) = v; } +static inline unsigned long host_spsr_to_spsr32(unsigned long spsr) +{ + return spsr; +} + static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu, u8 reg_num) { @@ -177,6 +194,11 @@ static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & HSR_SSE; } +static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) +{ + return false; +} + static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT; diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h index 7c0eddb0adb2..32fbf82e3ebc 100644 --- a/arch/arm/include/asm/kvm_mmio.h +++ b/arch/arm/include/asm/kvm_mmio.h @@ -14,6 +14,8 @@ struct kvm_decode { unsigned long rt; bool sign_extend; + /* Not used on 32-bit arm */ + bool sixty_four; }; void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S index b408fa56eb89..6922dd8d3e2d 100644 --- a/arch/arm/mach-tegra/sleep-tegra30.S +++ b/arch/arm/mach-tegra/sleep-tegra30.S @@ -370,6 +370,14 @@ _pll_m_c_x_done: pll_locked r1, r0, CLK_RESET_PLLC_BASE pll_locked r1, r0, CLK_RESET_PLLX_BASE + tegra_get_soc_id TEGRA_APB_MISC_BASE, r1 + cmp r1, #TEGRA30 + beq 1f + ldr r1, [r0, #CLK_RESET_PLLP_BASE] + bic r1, r1, #(1<<31) @ disable PllP bypass + str r1, [r0, #CLK_RESET_PLLP_BASE] +1: + mov32 r7, TEGRA_TMRUS_BASE ldr r1, [r7] add r1, r1, #LOCK_DELAY @@ -630,7 +638,10 @@ tegra30_switch_cpu_to_clk32k: str r0, [r4, #PMC_PLLP_WB0_OVERRIDE] /* disable PLLP, PLLA, PLLC and PLLX */ + tegra_get_soc_id TEGRA_APB_MISC_BASE, r1 + cmp r1, #TEGRA30 ldr r0, [r5, #CLK_RESET_PLLP_BASE] + orrne r0, r0, #(1 << 31) @ enable PllP bypass on fast cluster bic r0, r0, #(1 << 30) str r0, [r5, #CLK_RESET_PLLP_BASE] ldr r0, [r5, #CLK_RESET_PLLA_BASE] diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 7d042d5c43e3..27576c7b836e 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -221,7 +221,7 @@ EXPORT_SYMBOL(arm_coherent_dma_ops); static int __dma_supported(struct device *dev, u64 mask, bool warn) { - unsigned long max_dma_pfn = min(max_pfn, arm_dma_pfn_limit); + unsigned long max_dma_pfn = min(max_pfn - 1, arm_dma_pfn_limit); /* * Translate the device's DMA mask to a PFN limit. This diff --git a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi index 501a7330dbc8..522d3ef72df5 100644 --- a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi @@ -73,6 +73,7 @@ regulator-always-on; regulator-boot-on; regulator-name = "vdd_apc"; + regulator-initial-mode = <1>; regulator-min-microvolt = <1048000>; regulator-max-microvolt = <1384000>; }; diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 70b1469783f9..24bc0a3f26e2 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -261,7 +261,7 @@ static int ghash_setkey(struct crypto_shash *tfm, static struct shash_alg ghash_alg[] = {{ .base.cra_name = "ghash", .base.cra_driver_name = "ghash-neon", - .base.cra_priority = 100, + .base.cra_priority = 150, .base.cra_blocksize = GHASH_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct ghash_key), .base.cra_module = THIS_MODULE, diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 063c964af705..48bfbf70dbb0 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -36,7 +36,7 @@ static inline void local_daif_mask(void) trace_hardirqs_off(); } -static inline unsigned long local_daif_save(void) +static inline unsigned long local_daif_save_flags(void) { unsigned long flags; @@ -48,6 +48,15 @@ static inline unsigned long local_daif_save(void) flags |= PSR_I_BIT; } + return flags; +} + +static inline unsigned long local_daif_save(void) +{ + unsigned long flags; + + flags = local_daif_save_flags(); + local_daif_mask(); return flags; diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index d69c1efc63e7..6ff84f1f3b4c 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -204,6 +204,38 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v; } +/* + * The layout of SPSR for an AArch32 state is different when observed from an + * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32 + * view given an AArch64 view. + * + * In ARM DDI 0487E.a see: + * + * - The AArch64 view (SPSR_EL2) in section C5.2.18, page C5-426 + * - The AArch32 view (SPSR_abt) in section G8.2.126, page G8-6256 + * - The AArch32 view (SPSR_und) in section G8.2.132, page G8-6280 + * + * Which show the following differences: + * + * | Bit | AA64 | AA32 | Notes | + * +-----+------+------+-----------------------------| + * | 24 | DIT | J | J is RES0 in ARMv8 | + * | 21 | SS | DIT | SS doesn't exist in AArch32 | + * + * ... and all other bits are (currently) common. + */ +static inline unsigned long host_spsr_to_spsr32(unsigned long spsr) +{ + const unsigned long overlap = BIT(24) | BIT(21); + unsigned long dit = !!(spsr & PSR_AA32_DIT_BIT); + + spsr &= ~overlap; + + spsr |= dit << 21; + + return spsr; +} + static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) { u32 mode; @@ -263,6 +295,11 @@ static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); } +static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF); +} + static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h index 02b5c48fd467..b204501a0c39 100644 --- a/arch/arm64/include/asm/kvm_mmio.h +++ b/arch/arm64/include/asm/kvm_mmio.h @@ -10,13 +10,11 @@ #include #include -/* - * This is annoying. The mmio code requires this, even if we don't - * need any decoding. To be fixed. - */ struct kvm_decode { unsigned long rt; bool sign_extend; + /* Witdth of the register accessed by the faulting instruction is 64-bits */ + bool sixty_four; }; void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index fbebb411ae20..bf57308fcd63 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -62,6 +62,7 @@ #define PSR_AA32_I_BIT 0x00000080 #define PSR_AA32_A_BIT 0x00000100 #define PSR_AA32_E_BIT 0x00000200 +#define PSR_AA32_PAN_BIT 0x00400000 #define PSR_AA32_SSBS_BIT 0x00800000 #define PSR_AA32_DIT_BIT 0x01000000 #define PSR_AA32_Q_BIT 0x08000000 diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 7ed9294e2004..d1bb5b69f1ce 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -49,6 +49,7 @@ #define PSR_SSBS_BIT 0x00001000 #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 +#define PSR_DIT_BIT 0x01000000 #define PSR_V_BIT 0x10000000 #define PSR_C_BIT 0x20000000 #define PSR_Z_BIT 0x40000000 diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 3a58e9db5cfe..a100483b47c4 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -274,7 +274,7 @@ int apei_claim_sea(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES)) return err; - current_flags = arch_local_save_flags(); + current_flags = local_daif_save_flags(); /* * SEA can interrupt SError, mask it and describe this as an NMI so diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index a9d25a305af5..a364a4ad5479 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -14,9 +14,6 @@ #include #include -#define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \ - PSR_I_BIT | PSR_D_BIT) - #define CURRENT_EL_SP_EL0_VECTOR 0x0 #define CURRENT_EL_SP_ELx_VECTOR 0x200 #define LOWER_EL_AArch64_VECTOR 0x400 @@ -50,6 +47,69 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; } +/* + * When an exception is taken, most PSTATE fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all + * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx + * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0. + * + * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429. + * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426. + * + * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from + * MSB to LSB. + */ +static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu) +{ + unsigned long sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); + unsigned long old, new; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_N_BIT); + new |= (old & PSR_Z_BIT); + new |= (old & PSR_C_BIT); + new |= (old & PSR_V_BIT); + + // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) + + new |= (old & PSR_DIT_BIT); + + // PSTATE.UAO is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D5-2579. + + // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0 + // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page D5-2578. + new |= (old & PSR_PAN_BIT); + if (!(sctlr & SCTLR_EL1_SPAN)) + new |= PSR_PAN_BIT; + + // PSTATE.SS is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D2-2452. + + // PSTATE.IL is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D1-2306. + + // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64 + // See ARM DDI 0487E.a, page D13-3258 + if (sctlr & SCTLR_ELx_DSSBS) + new |= PSR_SSBS_BIT; + + // PSTATE.BTYPE is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, pages D1-2293 to D1-2294. + + new |= PSR_D_BIT; + new |= PSR_A_BIT; + new |= PSR_I_BIT; + new |= PSR_F_BIT; + + new |= PSR_MODE_EL1h; + + return new; +} + static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); @@ -59,7 +119,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); vcpu_write_spsr(vcpu, cpsr); vcpu_write_sys_reg(vcpu, addr, FAR_EL1); @@ -94,7 +154,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); vcpu_write_spsr(vcpu, cpsr); /* diff --git a/arch/mips/Makefile.postlink b/arch/mips/Makefile.postlink index 4eea4188cb20..13e0beb9eee3 100644 --- a/arch/mips/Makefile.postlink +++ b/arch/mips/Makefile.postlink @@ -12,7 +12,7 @@ __archpost: include scripts/Kbuild.include CMD_RELOCS = arch/mips/boot/tools/relocs -quiet_cmd_relocs = RELOCS $@ +quiet_cmd_relocs = RELOCS $@ cmd_relocs = $(CMD_RELOCS) $@ # `@true` prevents complaint when there is nothing to be done diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile index 528bd73d530a..4ed45ade32a1 100644 --- a/arch/mips/boot/Makefile +++ b/arch/mips/boot/Makefile @@ -123,7 +123,7 @@ $(obj)/vmlinux.its.S: $(addprefix $(srctree)/arch/mips/$(PLATFORM)/,$(ITS_INPUTS targets += vmlinux.its targets += vmlinux.gz.its targets += vmlinux.bz2.its -targets += vmlinux.lzmo.its +targets += vmlinux.lzma.its targets += vmlinux.lzo.its quiet_cmd_cpp_its_S = ITS $@ diff --git a/arch/mips/kernel/syscalls/Makefile b/arch/mips/kernel/syscalls/Makefile index a3d4bec695c6..6efb2f6889a7 100644 --- a/arch/mips/kernel/syscalls/Makefile +++ b/arch/mips/kernel/syscalls/Makefile @@ -18,7 +18,7 @@ quiet_cmd_syshdr = SYSHDR $@ '$(syshdr_pfx_$(basetarget))' \ '$(syshdr_offset_$(basetarget))' -quiet_cmd_sysnr = SYSNR $@ +quiet_cmd_sysnr = SYSNR $@ cmd_sysnr = $(CONFIG_SHELL) '$(sysnr)' '$<' '$@' \ '$(sysnr_abis_$(basetarget))' \ '$(sysnr_pfx_$(basetarget))' \ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3e56c9c2f16e..2b1033f13210 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -221,8 +221,7 @@ config PPC select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP - select HAVE_RCU_TABLE_FREE if SMP - select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE + select HAVE_RCU_TABLE_FREE select HAVE_MMU_GATHER_PAGE_SIZE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN @@ -237,6 +236,7 @@ config PPC select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE select NEED_SG_DMA_LENGTH select OF + select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE select OF_EARLY_FLATTREE select OLD_SIGACTION if PPC32 select OLD_SIGSUSPEND diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c index 1699e9531552..00c4d843a023 100644 --- a/arch/powerpc/boot/4xx.c +++ b/arch/powerpc/boot/4xx.c @@ -228,7 +228,7 @@ void ibm4xx_denali_fixup_memsize(void) dpath = 8; /* 64 bits */ /* get address pins (rows) */ - val = SDRAM0_READ(DDR0_42); + val = SDRAM0_READ(DDR0_42); row = DDR_GET_VAL(val, DDR_APIN, DDR_APIN_SHIFT); if (row > max_row) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index f9dc597b0b86..91c8f1d9bcee 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -102,11 +102,13 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) isync(); /* Context sync required after mtsrin() */ } -static inline void allow_user_access(void __user *to, const void __user *from, u32 size) +static __always_inline void allow_user_access(void __user *to, const void __user *from, + u32 size, unsigned long dir) { u32 addr, end; - if (__builtin_constant_p(to) && to == NULL) + BUILD_BUG_ON(!__builtin_constant_p(dir)); + if (!(dir & KUAP_WRITE)) return; addr = (__force u32)to; @@ -119,11 +121,16 @@ static inline void allow_user_access(void __user *to, const void __user *from, u kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */ } -static inline void prevent_user_access(void __user *to, const void __user *from, u32 size) +static __always_inline void prevent_user_access(void __user *to, const void __user *from, + u32 size, unsigned long dir) { u32 addr = (__force u32)to; u32 end = min(addr + size, TASK_SIZE); + BUILD_BUG_ON(!__builtin_constant_p(dir)); + if (!(dir & KUAP_WRITE)) + return; + if (!addr || addr >= TASK_SIZE || !size) return; @@ -131,12 +138,17 @@ static inline void prevent_user_access(void __user *to, const void __user *from, kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */ } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { + unsigned long begin = regs->kuap & 0xf0000000; + unsigned long end = regs->kuap << 28; + if (!is_write) return false; - return WARN(!regs->kuap, "Bug: write fault blocked by segment registers !"); + return WARN(address < begin || address >= end, + "Bug: write fault blocked by segment registers !"); } #endif /* CONFIG_PPC_KUAP */ diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index 998317702630..dc5c039eb28e 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -49,7 +49,6 @@ static inline void pgtable_free(void *table, unsigned index_size) #define get_hugepd_cache_index(x) (x) -#ifdef CONFIG_SMP static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) { @@ -66,13 +65,6 @@ static inline void __tlb_remove_table(void *_table) pgtable_free(table, shift); } -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, - void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index f254de956d6a..c8d1076e0ebb 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -77,25 +77,27 @@ static inline void set_kuap(unsigned long value) isync(); } -static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size) +static __always_inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) { // This is written so we can resolve to a single case at build time - if (__builtin_constant_p(to) && to == NULL) + BUILD_BUG_ON(!__builtin_constant_p(dir)); + if (dir == KUAP_READ) set_kuap(AMR_KUAP_BLOCK_WRITE); - else if (__builtin_constant_p(from) && from == NULL) + else if (dir == KUAP_WRITE) set_kuap(AMR_KUAP_BLOCK_READ); else set_kuap(0); } static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size) + unsigned long size, unsigned long dir) { set_kuap(AMR_KUAP_BLOCKED); } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index d5a44912902f..cae9e814593a 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -19,9 +19,7 @@ extern struct vmemmap_backing *vmemmap_list; extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long); extern void pmd_fragment_free(unsigned long *); extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); -#ifdef CONFIG_SMP extern void __tlb_remove_table(void *_table); -#endif void pte_frag_destroy(void *pte_frag); static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm) diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index eea28ca679db..bc7d9d06a6d9 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -35,7 +35,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, { int oldval = 0, ret; - allow_write_to_user(uaddr, sizeof(*uaddr)); + allow_read_write_user(uaddr, uaddr, sizeof(*uaddr)); pagefault_disable(); switch (op) { @@ -62,7 +62,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, *oval = oldval; - prevent_write_to_user(uaddr, sizeof(*uaddr)); + prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr)); return ret; } @@ -76,7 +76,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(uaddr, sizeof(u32))) return -EFAULT; - allow_write_to_user(uaddr, sizeof(*uaddr)); + allow_read_write_user(uaddr, uaddr, sizeof(*uaddr)); + __asm__ __volatile__ ( PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ @@ -97,7 +98,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "cc", "memory"); *uval = prev; - prevent_write_to_user(uaddr, sizeof(*uaddr)); + prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr)); + return ret; } diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 5b5e39643a27..94f24928916a 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -2,6 +2,10 @@ #ifndef _ASM_POWERPC_KUP_H_ #define _ASM_POWERPC_KUP_H_ +#define KUAP_READ 1 +#define KUAP_WRITE 2 +#define KUAP_READ_WRITE (KUAP_READ | KUAP_WRITE) + #ifdef CONFIG_PPC64 #include #endif @@ -42,32 +46,48 @@ void setup_kuap(bool disabled); #else static inline void setup_kuap(bool disabled) { } static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size) { } + unsigned long size, unsigned long dir) { } static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size) { } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) { return false; } + unsigned long size, unsigned long dir) { } +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ + return false; +} #endif /* CONFIG_PPC_KUAP */ static inline void allow_read_from_user(const void __user *from, unsigned long size) { - allow_user_access(NULL, from, size); + allow_user_access(NULL, from, size, KUAP_READ); } static inline void allow_write_to_user(void __user *to, unsigned long size) { - allow_user_access(to, NULL, size); + allow_user_access(to, NULL, size, KUAP_WRITE); +} + +static inline void allow_read_write_user(void __user *to, const void __user *from, + unsigned long size) +{ + allow_user_access(to, from, size, KUAP_READ_WRITE); } static inline void prevent_read_from_user(const void __user *from, unsigned long size) { - prevent_user_access(NULL, from, size); + prevent_user_access(NULL, from, size, KUAP_READ); } static inline void prevent_write_to_user(void __user *to, unsigned long size) { - prevent_user_access(to, NULL, size); + prevent_user_access(to, NULL, size, KUAP_WRITE); +} + +static inline void prevent_read_write_user(void __user *to, const void __user *from, + unsigned long size) +{ + prevent_user_access(to, from, size, KUAP_READ_WRITE); } #endif /* !__ASSEMBLY__ */ -#endif /* _ASM_POWERPC_KUP_H_ */ +#endif /* _ASM_POWERPC_KUAP_H_ */ diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 1c3133b5f86a..6fe97465e350 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -34,18 +34,19 @@ #include static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size) + unsigned long size, unsigned long dir) { mtspr(SPRN_MD_AP, MD_APG_INIT); } static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size) + unsigned long size, unsigned long dir) { mtspr(SPRN_MD_AP, MD_APG_KUAP); } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000), "Bug: fault blocked by AP register !"); diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h index 332b13b4ecdb..29c43665a753 100644 --- a/arch/powerpc/include/asm/nohash/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/pgalloc.h @@ -46,7 +46,6 @@ static inline void pgtable_free(void *table, int shift) #define get_hugepd_cache_index(x) (x) -#ifdef CONFIG_SMP static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) { unsigned long pgf = (unsigned long)table; @@ -64,13 +63,6 @@ static inline void __tlb_remove_table(void *_table) pgtable_free(table, shift); } -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif - static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index b2c0be93929d..7f3a8b902325 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -26,6 +26,17 @@ #define tlb_flush tlb_flush extern void tlb_flush(struct mmu_gather *tlb); +/* + * book3s: + * Hash does not use the linux page-tables, so we can avoid + * the TLB invalidate for page-table freeing, Radix otoh does use the + * page-tables and needs the TLBI. + * + * nohash: + * We still do TLB invalidate in the __pte_free_tlb routine before we + * add the page table pages to mmu gather table batch. + */ +#define tlb_needs_table_invalidate() radix_enabled() /* Get the generic bits... */ #include diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index c92fe7fe9692..cafad1960e76 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -313,9 +313,9 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) unsigned long ret; barrier_nospec(); - allow_user_access(to, from, n); + allow_read_write_user(to, from, n); ret = __copy_tofrom_user(to, from, n); - prevent_user_access(to, from, n); + prevent_read_write_user(to, from, n); return ret; } #endif /* __powerpc64__ */ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index d60908ea37fb..59bb4f4ae316 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -179,7 +179,7 @@ transfer_to_handler: 2: /* if from kernel, check interrupted DOZE/NAP mode and * check for stack overflow */ - kuap_save_and_lock r11, r12, r9, r2, r0 + kuap_save_and_lock r11, r12, r9, r2, r6 addi r2, r12, -THREAD lwz r9,KSP_LIMIT(r12) cmplw r1,r9 /* if r1 <= ksp_limit */ @@ -284,6 +284,7 @@ reenable_mmu: rlwinm r9,r9,0,~MSR_EE lwz r12,_LINK(r11) /* and return to address in LR */ kuap_restore r11, r2, r3, r4, r5 + lwz r2, GPR2(r11) b fast_exception_return #endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 709cf1fd4cf4..36abbe3c346d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2354,7 +2354,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, mutex_unlock(&kvm->lock); if (!vcore) - goto free_vcpu; + goto uninit_vcpu; spin_lock(&vcore->lock); ++vcore->num_threads; @@ -2371,6 +2371,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, return vcpu; +uninit_vcpu: + kvm_vcpu_uninit(vcpu); free_vcpu: kmem_cache_free(kvm_vcpu_cache, vcpu); out: diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index cc65af8fe6f7..3f6ad3f58628 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1769,10 +1769,12 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, err = kvmppc_mmu_init(vcpu); if (err < 0) - goto uninit_vcpu; + goto free_shared_page; return vcpu; +free_shared_page: + free_page((unsigned long)vcpu->arch.shared); uninit_vcpu: kvm_vcpu_uninit(vcpu); free_shadow_vcpu: diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 5a3373e06e60..235d57d6c205 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -638,7 +638,7 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, srcu_idx = srcu_read_lock(&kvm->srcu); gfn = gpa_to_gfn(kvm_eq.qaddr); - page_size = kvm_host_page_size(kvm, gfn); + page_size = kvm_host_page_size(vcpu, gfn); if (1ull << kvm_eq.qshift > page_size) { srcu_read_unlock(&kvm->srcu, srcu_idx); pr_warn("Incompatible host page size %lx!\n", page_size); diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 75483b40fcb1..2bf7e1b4fd82 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -378,7 +378,6 @@ static inline void pgtable_free(void *table, int index) } } -#ifdef CONFIG_SMP void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index) { unsigned long pgf = (unsigned long)table; @@ -395,12 +394,6 @@ void __tlb_remove_table(void *_table) return pgtable_free(table, index); } -#else -void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index) -{ - return pgtable_free(table, index); -} -#endif #ifdef CONFIG_PROC_FS atomic_long_t direct_pages_count[MMU_PAGE_COUNT]; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 8432c281de92..9298905cfe74 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -233,7 +233,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, // Read/write fault in a valid region (the exception table search passed // above), but blocked by KUAP is bad, it can never succeed. - if (bad_kuap_fault(regs, is_write)) + if (bad_kuap_fault(regs, address, is_write)) return true; // What's left? Kernel fault on user in well defined regions (extable diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 2f9ddc29c535..c73205172447 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -173,10 +173,12 @@ static void dump_addr(struct pg_state *st, unsigned long addr) static void note_prot_wx(struct pg_state *st, unsigned long addr) { + pte_t pte = __pte(st->current_flags); + if (!IS_ENABLED(CONFIG_PPC_DEBUG_WX) || !st->check_wx) return; - if (!((st->current_flags & pgprot_val(PAGE_KERNEL_X)) == pgprot_val(PAGE_KERNEL_X))) + if (!pte_write(pte) || !pte_exec(pte)) return; WARN_ONCE(1, "powerpc/mm: Found insecure W+X mapping at address %p/%pS\n", diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 8e700390f3d6..4c3af2e9eb8e 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -360,8 +360,10 @@ static bool lmb_is_removable(struct drmem_lmb *lmb) for (i = 0; i < scns_per_block; i++) { pfn = PFN_DOWN(phys_addr); - if (!pfn_present(pfn)) + if (!pfn_present(pfn)) { + phys_addr += MIN_MEMORY_BLOCK_SIZE; continue; + } rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION); phys_addr += MIN_MEMORY_BLOCK_SIZE; diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index d83364ebc5c5..8057aafd5f5e 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1894,15 +1894,14 @@ static void dump_300_sprs(void) printf("pidr = %.16lx tidr = %.16lx\n", mfspr(SPRN_PID), mfspr(SPRN_TIDR)); - printf("asdr = %.16lx psscr = %.16lx\n", - mfspr(SPRN_ASDR), hv ? mfspr(SPRN_PSSCR) - : mfspr(SPRN_PSSCR_PR)); + printf("psscr = %.16lx\n", + hv ? mfspr(SPRN_PSSCR) : mfspr(SPRN_PSSCR_PR)); if (!hv) return; - printf("ptcr = %.16lx\n", - mfspr(SPRN_PTCR)); + printf("ptcr = %.16lx asdr = %.16lx\n", + mfspr(SPRN_PTCR), mfspr(SPRN_ASDR)); #endif } diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp.c index 7fbf56aab661..e2279fed8f56 100644 --- a/arch/riscv/net/bpf_jit_comp.c +++ b/arch/riscv/net/bpf_jit_comp.c @@ -120,6 +120,11 @@ static bool seen_reg(int reg, struct rv_jit_context *ctx) return false; } +static void mark_fp(struct rv_jit_context *ctx) +{ + __set_bit(RV_CTX_F_SEEN_S5, &ctx->flags); +} + static void mark_call(struct rv_jit_context *ctx) { __set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags); @@ -596,7 +601,8 @@ static void __build_epilogue(u8 reg, struct rv_jit_context *ctx) emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx); /* Set return value. */ - emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx); + if (reg == RV_REG_RA) + emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx); emit(rv_jalr(RV_REG_ZERO, reg, 0), ctx); } @@ -1426,6 +1432,10 @@ static void build_prologue(struct rv_jit_context *ctx) { int stack_adjust = 0, store_offset, bpf_stack_adjust; + bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); + if (bpf_stack_adjust) + mark_fp(ctx); + if (seen_reg(RV_REG_RA, ctx)) stack_adjust += 8; stack_adjust += 8; /* RV_REG_FP */ @@ -1443,7 +1453,6 @@ static void build_prologue(struct rv_jit_context *ctx) stack_adjust += 8; stack_adjust = round_up(stack_adjust, 16); - bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); stack_adjust += bpf_stack_adjust; store_offset = stack_adjust - 8; diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 823578c6b9e2..3f5cb55cde35 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -33,6 +33,8 @@ #define ARCH_HAS_PREPARE_HUGEPAGE #define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA + #include #ifndef __ASSEMBLY__ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d047e846e1b9..756c627f7e54 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2863,9 +2863,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 | CR14_UNUSED_33 | CR14_EXTERNAL_DAMAGE_SUBMASK; - /* make sure the new fpc will be lazily loaded */ - save_fpu_regs(); - current->thread.fpu.fpc = 0; + vcpu->run->s.regs.fpc = 0; vcpu->arch.sie_block->gbea = 1; vcpu->arch.sie_block->pp = 0; vcpu->arch.sie_block->fpf &= ~FPF_BPBC; @@ -4354,7 +4352,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, switch (ioctl) { case KVM_S390_STORE_STATUS: idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvm_s390_vcpu_store_status(vcpu, arg); + r = kvm_s390_store_status_unloaded(vcpu, arg); srcu_read_unlock(&vcpu->kvm->srcu, idx); break; case KVM_S390_SET_INITIAL_PSW: { diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index b0246c705a19..5674710a4841 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -2,7 +2,7 @@ /* * IBM System z Huge TLB Page Support for Kernel. * - * Copyright IBM Corp. 2007,2016 + * Copyright IBM Corp. 2007,2020 * Author(s): Gerald Schaefer */ @@ -11,6 +11,9 @@ #include #include +#include +#include +#include /* * If the bit selected by single-bit bitmask "a" is set within "x", move @@ -267,3 +270,98 @@ static __init int setup_hugepagesz(char *opt) return 1; } __setup("hugepagesz=", setup_hugepagesz); + +static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct vm_unmapped_area_info info; + + info.flags = 0; + info.length = len; + info.low_limit = current->mm->mmap_base; + info.high_limit = TASK_SIZE; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + return vm_unmapped_area(&info); +} + +static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, + unsigned long addr0, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct vm_unmapped_area_info info; + unsigned long addr; + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = max(PAGE_SIZE, mmap_min_addr); + info.high_limit = current->mm->mmap_base; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + addr = vm_unmapped_area(&info); + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + if (addr & ~PAGE_MASK) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + addr = vm_unmapped_area(&info); + } + + return addr; +} + +unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc; + + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (len > TASK_SIZE - mmap_min_addr) + return -ENOMEM; + + if (flags & MAP_FIXED) { + if (prepare_hugepage_range(file, addr, len)) + return -EINVAL; + goto check_asce_limit; + } + + if (addr) { + addr = ALIGN(addr, huge_page_size(h)); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && + (!vma || addr + len <= vm_start_gap(vma))) + goto check_asce_limit; + } + + if (mm->get_unmapped_area == arch_get_unmapped_area) + addr = hugetlb_get_unmapped_area_bottomup(file, addr, len, + pgoff, flags); + else + addr = hugetlb_get_unmapped_area_topdown(file, addr, len, + pgoff, flags); + if (addr & ~PAGE_MASK) + return addr; + +check_asce_limit: + if (addr + len > current->mm->context.asce_limit && + addr + len <= TASK_SIZE) { + rc = crst_table_upgrade(mm, addr + len); + if (rc) + return (unsigned long) rc; + } + return addr; +} diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index eb24cb1afc11..18e9fb6fcf1b 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -65,7 +65,6 @@ config SPARC64 select HAVE_KRETPROBES select HAVE_KPROBES select HAVE_RCU_TABLE_FREE if SMP - select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE select HAVE_MEMBLOCK_NODE_MAP select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_DYNAMIC_FTRACE diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h index a2f3fa61ee36..8cb8f3833239 100644 --- a/arch/sparc/include/asm/tlb_64.h +++ b/arch/sparc/include/asm/tlb_64.h @@ -28,6 +28,15 @@ void flush_tlb_pending(void); #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) #define tlb_flush(tlb) flush_tlb_pending() +/* + * SPARC64's hardware TLB fill does not use the Linux page-tables + * and therefore we don't need a TLBI when freeing page-table pages. + */ + +#ifdef CONFIG_HAVE_RCU_TABLE_FREE +#define tlb_needs_table_invalidate() (false) +#endif + #include #endif /* _SPARC64_TLB_H */ diff --git a/arch/sparc/include/uapi/asm/ipcbuf.h b/arch/sparc/include/uapi/asm/ipcbuf.h index 9d0d125500e2..084b8949ddff 100644 --- a/arch/sparc/include/uapi/asm/ipcbuf.h +++ b/arch/sparc/include/uapi/asm/ipcbuf.h @@ -15,19 +15,19 @@ struct ipc64_perm { - __kernel_key_t key; - __kernel_uid_t uid; - __kernel_gid_t gid; - __kernel_uid_t cuid; - __kernel_gid_t cgid; + __kernel_key_t key; + __kernel_uid32_t uid; + __kernel_gid32_t gid; + __kernel_uid32_t cuid; + __kernel_gid32_t cgid; #ifndef __arch64__ - unsigned short __pad0; + unsigned short __pad0; #endif - __kernel_mode_t mode; - unsigned short __pad1; - unsigned short seq; - unsigned long long __unused1; - unsigned long long __unused2; + __kernel_mode_t mode; + unsigned short __pad1; + unsigned short seq; + unsigned long long __unused1; + unsigned long long __unused2; }; #endif /* __SPARC_IPCBUF_H */ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 2ebc17d9c72c..19e94af9cc5d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -140,6 +140,7 @@ extern void apic_soft_disable(void); extern void lapic_shutdown(void); extern void sync_Arb_IDs(void); extern void init_bsp_APIC(void); +extern void apic_intr_mode_select(void); extern void apic_intr_mode_init(void); extern void init_apic_mappings(void); void register_lapic_address(unsigned long address); @@ -188,6 +189,7 @@ static inline void disable_local_APIC(void) { } # define setup_secondary_APIC_clock x86_init_noop static inline void lapic_update_tsc_freq(void) { } static inline void init_bsp_APIC(void) { } +static inline void apic_intr_mode_select(void) { } static inline void apic_intr_mode_init(void) { } static inline void lapic_assign_system_vectors(void) { } static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } @@ -452,6 +454,14 @@ static inline void ack_APIC_irq(void) apic_eoi(); } + +static inline bool lapic_vector_set_in_irr(unsigned int vector) +{ + u32 irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + + return !!(irr & (1U << (vector % 32))); +} + static inline unsigned default_get_apic_id(unsigned long x) { unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4fc61483919a..c1ed054c103c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -380,12 +380,12 @@ struct kvm_mmu { void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); - int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, + int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err, bool prefault); void (*inject_page_fault)(struct kvm_vcpu *vcpu, struct x86_exception *fault); - gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, - struct x86_exception *exception); + gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa, + u32 access, struct x86_exception *exception); gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, struct x86_exception *exception); int (*sync_page)(struct kvm_vcpu *vcpu, @@ -667,10 +667,10 @@ struct kvm_vcpu_arch { bool pvclock_set_guest_stopped_request; struct { + u8 preempted; u64 msr_val; u64 last_steal; - struct gfn_to_hva_cache stime; - struct kvm_steal_time steal; + struct gfn_to_pfn_cache cache; } st; u64 tsc_offset; @@ -1128,6 +1128,7 @@ struct kvm_x86_ops { bool (*xsaves_supported)(void); bool (*umip_emulated)(void); bool (*pt_supported)(void); + bool (*pku_supported)(void); int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); void (*request_immediate_exit)(struct kvm_vcpu *vcpu); @@ -1450,7 +1451,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code, +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 19435858df5f..96d9cd208610 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -51,12 +51,14 @@ struct x86_init_resources { * are set up. * @intr_init: interrupt init code * @trap_init: platform specific trap setup + * @intr_mode_select: interrupt delivery mode selection * @intr_mode_init: interrupt delivery mode setup */ struct x86_init_irqs { void (*pre_vector_init)(void); void (*intr_init)(void); void (*trap_init)(void); + void (*intr_mode_select)(void); void (*intr_mode_init)(void); }; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 2b0faf86da1b..df891f874614 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -830,8 +830,17 @@ bool __init apic_needs_pit(void) if (!tsc_khz || !cpu_khz) return true; - /* Is there an APIC at all? */ - if (!boot_cpu_has(X86_FEATURE_APIC)) + /* Is there an APIC at all or is it disabled? */ + if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic) + return true; + + /* + * If interrupt delivery mode is legacy PIC or virtual wire without + * configuration, the local APIC timer wont be set up. Make sure + * that the PIT is initialized. + */ + if (apic_intr_mode == APIC_PIC || + apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG) return true; /* Virt guests may lack ARAT, but still have DEADLINE */ @@ -1322,7 +1331,7 @@ void __init sync_Arb_IDs(void) enum apic_intr_mode_id apic_intr_mode __ro_after_init; -static int __init apic_intr_mode_select(void) +static int __init __apic_intr_mode_select(void) { /* Check kernel option */ if (disable_apic) { @@ -1384,6 +1393,12 @@ static int __init apic_intr_mode_select(void) return APIC_SYMMETRIC_IO; } +/* Select the interrupt delivery mode for the BSP */ +void __init apic_intr_mode_select(void) +{ + apic_intr_mode = __apic_intr_mode_select(); +} + /* * An initial setup of the virtual wire mode. */ @@ -1440,8 +1455,6 @@ void __init apic_intr_mode_init(void) { bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT); - apic_intr_mode = apic_intr_mode_select(); - switch (apic_intr_mode) { case APIC_PIC: pr_info("APIC: Keep in PIC mode(8259)\n"); diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 7f7533462474..159bd0cb8548 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -23,10 +23,8 @@ static struct irq_domain *msi_default_domain; -static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg) { - struct irq_cfg *cfg = irqd_cfg(data); - msg->address_hi = MSI_ADDR_BASE_HI; if (x2apic_enabled()) @@ -47,6 +45,127 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) MSI_DATA_VECTOR(cfg->vector); } +static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +{ + __irq_msi_compose_msg(irqd_cfg(data), msg); +} + +static void irq_msi_update_msg(struct irq_data *irqd, struct irq_cfg *cfg) +{ + struct msi_msg msg[2] = { [1] = { }, }; + + __irq_msi_compose_msg(cfg, msg); + irq_data_get_irq_chip(irqd)->irq_write_msi_msg(irqd, msg); +} + +static int +msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force) +{ + struct irq_cfg old_cfg, *cfg = irqd_cfg(irqd); + struct irq_data *parent = irqd->parent_data; + unsigned int cpu; + int ret; + + /* Save the current configuration */ + cpu = cpumask_first(irq_data_get_effective_affinity_mask(irqd)); + old_cfg = *cfg; + + /* Allocate a new target vector */ + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) + return ret; + + /* + * For non-maskable and non-remapped MSI interrupts the migration + * to a different destination CPU and a different vector has to be + * done careful to handle the possible stray interrupt which can be + * caused by the non-atomic update of the address/data pair. + * + * Direct update is possible when: + * - The MSI is maskable (remapped MSI does not use this code path)). + * The quirk bit is not set in this case. + * - The new vector is the same as the old vector + * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up) + * - The new destination CPU is the same as the old destination CPU + */ + if (!irqd_msi_nomask_quirk(irqd) || + cfg->vector == old_cfg.vector || + old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR || + cfg->dest_apicid == old_cfg.dest_apicid) { + irq_msi_update_msg(irqd, cfg); + return ret; + } + + /* + * Paranoia: Validate that the interrupt target is the local + * CPU. + */ + if (WARN_ON_ONCE(cpu != smp_processor_id())) { + irq_msi_update_msg(irqd, cfg); + return ret; + } + + /* + * Redirect the interrupt to the new vector on the current CPU + * first. This might cause a spurious interrupt on this vector if + * the device raises an interrupt right between this update and the + * update to the final destination CPU. + * + * If the vector is in use then the installed device handler will + * denote it as spurious which is no harm as this is a rare event + * and interrupt handlers have to cope with spurious interrupts + * anyway. If the vector is unused, then it is marked so it won't + * trigger the 'No irq handler for vector' warning in do_IRQ(). + * + * This requires to hold vector lock to prevent concurrent updates to + * the affected vector. + */ + lock_vector_lock(); + + /* + * Mark the new target vector on the local CPU if it is currently + * unused. Reuse the VECTOR_RETRIGGERED state which is also used in + * the CPU hotplug path for a similar purpose. This cannot be + * undone here as the current CPU has interrupts disabled and + * cannot handle the interrupt before the whole set_affinity() + * section is done. In the CPU unplug case, the current CPU is + * about to vanish and will not handle any interrupts anymore. The + * vector is cleaned up when the CPU comes online again. + */ + if (IS_ERR_OR_NULL(this_cpu_read(vector_irq[cfg->vector]))) + this_cpu_write(vector_irq[cfg->vector], VECTOR_RETRIGGERED); + + /* Redirect it to the new vector on the local CPU temporarily */ + old_cfg.vector = cfg->vector; + irq_msi_update_msg(irqd, &old_cfg); + + /* Now transition it to the target CPU */ + irq_msi_update_msg(irqd, cfg); + + /* + * All interrupts after this point are now targeted at the new + * vector/CPU. + * + * Drop vector lock before testing whether the temporary assignment + * to the local CPU was hit by an interrupt raised in the device, + * because the retrigger function acquires vector lock again. + */ + unlock_vector_lock(); + + /* + * Check whether the transition raced with a device interrupt and + * is pending in the local APICs IRR. It is safe to do this outside + * of vector lock as the irq_desc::lock of this interrupt is still + * held and interrupts are disabled: The check is not accessing the + * underlying vector store. It's just checking the local APIC's + * IRR. + */ + if (lapic_vector_set_in_irr(cfg->vector)) + irq_data_get_irq_chip(irqd)->irq_retrigger(irqd); + + return ret; +} + /* * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, * which implement the MSI or MSI-X Capability Structure. @@ -58,6 +177,7 @@ static struct irq_chip pci_msi_controller = { .irq_ack = irq_chip_ack_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_compose_msi_msg = irq_msi_compose_msg, + .irq_set_affinity = msi_set_affinity, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -146,6 +266,8 @@ void __init arch_init_msi_domain(struct irq_domain *parent) } if (!msi_default_domain) pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n"); + else + msi_default_domain->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK; } #ifdef CONFIG_IRQ_REMAP diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index 3e20d322bc98..032509adf9de 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -115,11 +115,12 @@ void __init tsx_init(void) tsx_disable(); /* - * tsx_disable() will change the state of the - * RTM CPUID bit. Clear it here since it is now - * expected to be not set. + * tsx_disable() will change the state of the RTM and HLE CPUID + * bits. Clear them here since they are now expected to be not + * set. */ setup_clear_cpu_cap(X86_FEATURE_RTM); + setup_clear_cpu_cap(X86_FEATURE_HLE); } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) { /* @@ -131,10 +132,10 @@ void __init tsx_init(void) tsx_enable(); /* - * tsx_enable() will change the state of the - * RTM CPUID bit. Force it here since it is now - * expected to be set. + * tsx_enable() will change the state of the RTM and HLE CPUID + * bits. Force them here since they are now expected to be set. */ setup_force_cpu_cap(X86_FEATURE_RTM); + setup_force_cpu_cap(X86_FEATURE_HLE); } } diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 7ce29cee9f9e..d8673d8a779b 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -91,10 +91,18 @@ void __init hpet_time_init(void) static __init void x86_late_time_init(void) { + /* + * Before PIT/HPET init, select the interrupt mode. This is required + * to make the decision whether PIT should be initialized correct. + */ + x86_init.irqs.intr_mode_select(); + + /* Setup the legacy timers */ x86_init.timers.timer_init(); + /* - * After PIT/HPET timers init, select and setup - * the final interrupt mode for delivering IRQs. + * After PIT/HPET timers init, set up the final interrupt mode for + * delivering IRQs. */ x86_init.irqs.intr_mode_init(); tsc_init(); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 18a799c8fa28..1838b10a299c 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -58,6 +58,7 @@ struct x86_init_ops x86_init __initdata = { .pre_vector_init = init_ISA_irqs, .intr_init = native_init_IRQ, .trap_init = x86_init_noop, + .intr_mode_select = apic_intr_mode_select, .intr_mode_init = apic_intr_mode_init }, diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b1d5a8c94a57..6fa946f983c9 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -352,6 +352,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0; unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0; unsigned f_la57; + unsigned f_pku = kvm_x86_ops->pku_supported() ? F(PKU) : 0; /* cpuid 7.0.ebx */ const u32 kvm_cpuid_7_0_ebx_x86_features = @@ -363,7 +364,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) /* cpuid 7.0.ecx*/ const u32 kvm_cpuid_7_0_ecx_x86_features = - F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) | + F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/; @@ -392,6 +393,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) /* Set LA57 based on hardware capability. */ entry->ecx |= f_la57; entry->ecx |= f_umip; + entry->ecx |= f_pku; /* PKU is not yet implemented for shadow paging. */ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 698efb8c3897..37aa9ce29b33 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -22,6 +22,7 @@ #include "kvm_cache_regs.h" #include #include +#include #include #include @@ -1075,8 +1076,23 @@ static void fetch_register_operand(struct operand *op) } } +static void emulator_get_fpu(void) +{ + fpregs_lock(); + + fpregs_assert_state_consistent(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_return(); +} + +static void emulator_put_fpu(void) +{ + fpregs_unlock(); +} + static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) { + emulator_get_fpu(); switch (reg) { case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break; case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break; @@ -1098,11 +1114,13 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) #endif default: BUG(); } + emulator_put_fpu(); } static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) { + emulator_get_fpu(); switch (reg) { case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break; case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break; @@ -1124,10 +1142,12 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, #endif default: BUG(); } + emulator_put_fpu(); } static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) { + emulator_get_fpu(); switch (reg) { case 0: asm("movq %%mm0, %0" : "=m"(*data)); break; case 1: asm("movq %%mm1, %0" : "=m"(*data)); break; @@ -1139,10 +1159,12 @@ static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) case 7: asm("movq %%mm7, %0" : "=m"(*data)); break; default: BUG(); } + emulator_put_fpu(); } static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) { + emulator_get_fpu(); switch (reg) { case 0: asm("movq %0, %%mm0" : : "m"(*data)); break; case 1: asm("movq %0, %%mm1" : : "m"(*data)); break; @@ -1154,6 +1176,7 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) case 7: asm("movq %0, %%mm7" : : "m"(*data)); break; default: BUG(); } + emulator_put_fpu(); } static int em_fninit(struct x86_emulate_ctxt *ctxt) @@ -1161,7 +1184,9 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); + emulator_get_fpu(); asm volatile("fninit"); + emulator_put_fpu(); return X86EMUL_CONTINUE; } @@ -1172,7 +1197,9 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); + emulator_get_fpu(); asm volatile("fnstcw %0": "+m"(fcw)); + emulator_put_fpu(); ctxt->dst.val = fcw; @@ -1186,7 +1213,9 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); + emulator_get_fpu(); asm volatile("fnstsw %0": "+m"(fsw)); + emulator_put_fpu(); ctxt->dst.val = fsw; @@ -4094,8 +4123,12 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; + emulator_get_fpu(); + rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); + emulator_put_fpu(); + if (rc != X86EMUL_CONTINUE) return rc; @@ -4138,6 +4171,8 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; + emulator_get_fpu(); + if (size < __fxstate_size(16)) { rc = fxregs_fixup(&fx_state, size); if (rc != X86EMUL_CONTINUE) @@ -4153,6 +4188,8 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state)); out: + emulator_put_fpu(); + return rc; } @@ -5212,16 +5249,28 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->ad_bytes = def_ad_bytes ^ 6; break; case 0x26: /* ES override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_ES; + break; case 0x2e: /* CS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_CS; + break; case 0x36: /* SS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_SS; + break; case 0x3e: /* DS override */ has_seg_override = true; - ctxt->seg_override = (ctxt->b >> 3) & 3; + ctxt->seg_override = VCPU_SREG_DS; break; case 0x64: /* FS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_FS; + break; case 0x65: /* GS override */ has_seg_override = true; - ctxt->seg_override = ctxt->b & 7; + ctxt->seg_override = VCPU_SREG_GS; break; case 0x40 ... 0x4f: /* REX */ if (mode != X86EMUL_MODE_PROT64) @@ -5305,10 +5354,15 @@ done_prefixes: } break; case Escape: - if (ctxt->modrm > 0xbf) - opcode = opcode.u.esc->high[ctxt->modrm - 0xc0]; - else + if (ctxt->modrm > 0xbf) { + size_t size = ARRAY_SIZE(opcode.u.esc->high); + u32 index = array_index_nospec( + ctxt->modrm - 0xc0, size); + + opcode = opcode.u.esc->high[index]; + } else { opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7]; + } break; case InstrDual: if ((ctxt->modrm >> 6) == 3) @@ -5450,7 +5504,9 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt) { int rc; + emulator_get_fpu(); rc = asm_safe("fwait"); + emulator_put_fpu(); if (unlikely(rc != X86EMUL_CONTINUE)) return emulate_exception(ctxt, MF_VECTOR, 0, false); diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 23ff65504d7e..26408434b9bc 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -809,11 +809,12 @@ static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, u32 index, u64 *pdata) { struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + size_t size = ARRAY_SIZE(hv->hv_crash_param); - if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) + if (WARN_ON_ONCE(index >= size)) return -EINVAL; - *pdata = hv->hv_crash_param[index]; + *pdata = hv->hv_crash_param[array_index_nospec(index, size)]; return 0; } @@ -852,11 +853,12 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 index, u64 data) { struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + size_t size = ARRAY_SIZE(hv->hv_crash_param); - if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) + if (WARN_ON_ONCE(index >= size)) return -EINVAL; - hv->hv_crash_param[index] = data; + hv->hv_crash_param[array_index_nospec(index, size)] = data; return 0; } diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 8b38bb4868a6..629a09ca9860 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -460,10 +460,14 @@ static int picdev_write(struct kvm_pic *s, switch (addr) { case 0x20: case 0x21: + pic_lock(s); + pic_ioport_write(&s->pics[0], addr, data); + pic_unlock(s); + break; case 0xa0: case 0xa1: pic_lock(s); - pic_ioport_write(&s->pics[addr >> 7], addr, data); + pic_ioport_write(&s->pics[1], addr, data); pic_unlock(s); break; case 0x4d0: diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index d859ae8890d0..24a6905d60ee 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -68,13 +69,14 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, default: { u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; - u64 redir_content; + u64 redir_content = ~0ULL; - if (redir_index < IOAPIC_NUM_PINS) - redir_content = - ioapic->redirtbl[redir_index].bits; - else - redir_content = ~0ULL; + if (redir_index < IOAPIC_NUM_PINS) { + u32 index = array_index_nospec( + redir_index, IOAPIC_NUM_PINS); + + redir_content = ioapic->redirtbl[index].bits; + } result = (ioapic->ioregsel & 0x1) ? (redir_content >> 32) & 0xffffffff : @@ -291,6 +293,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) if (index >= IOAPIC_NUM_PINS) return; + index = array_index_nospec(index, IOAPIC_NUM_PINS); e = &ioapic->redirtbl[index]; mask_before = e->fields.mask; /* Preserve read-only fields */ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index b29d00b661ff..15728971a430 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1926,15 +1926,20 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_LVTTHMR: case APIC_LVTPC: case APIC_LVT1: - case APIC_LVTERR: + case APIC_LVTERR: { /* TODO: Check vector */ + size_t size; + u32 index; + if (!kvm_apic_sw_enabled(apic)) val |= APIC_LVT_MASKED; - - val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; + size = ARRAY_SIZE(apic_lvt_mask); + index = array_index_nospec( + (reg - APIC_LVTT) >> 4, size); + val &= apic_lvt_mask[index]; kvm_lapic_set_reg(apic, reg, val); - break; + } case APIC_LVTT: if (!kvm_apic_sw_enabled(apic)) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2ce9da58611e..518100ea5ef4 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -418,22 +418,24 @@ static inline bool is_access_track_spte(u64 spte) * requires a full MMU zap). The flag is instead explicitly queried when * checking for MMIO spte cache hits. */ -#define MMIO_SPTE_GEN_MASK GENMASK_ULL(18, 0) +#define MMIO_SPTE_GEN_MASK GENMASK_ULL(17, 0) #define MMIO_SPTE_GEN_LOW_START 3 #define MMIO_SPTE_GEN_LOW_END 11 #define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \ MMIO_SPTE_GEN_LOW_START) -#define MMIO_SPTE_GEN_HIGH_START 52 -#define MMIO_SPTE_GEN_HIGH_END 61 +#define MMIO_SPTE_GEN_HIGH_START PT64_SECOND_AVAIL_BITS_SHIFT +#define MMIO_SPTE_GEN_HIGH_END 62 #define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \ MMIO_SPTE_GEN_HIGH_START) + static u64 generation_mmio_spte_mask(u64 gen) { u64 mask; WARN_ON(gen & ~MMIO_SPTE_GEN_MASK); + BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK); mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK; mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK; @@ -444,8 +446,6 @@ static u64 get_mmio_spte_generation(u64 spte) { u64 gen; - spte &= ~shadow_mmio_mask; - gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START; gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START; return gen; @@ -538,16 +538,20 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); static u8 kvm_get_shadow_phys_bits(void) { /* - * boot_cpu_data.x86_phys_bits is reduced when MKTME is detected - * in CPU detection code, but MKTME treats those reduced bits as - * 'keyID' thus they are not reserved bits. Therefore for MKTME - * we should still return physical address bits reported by CPUID. + * boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected + * in CPU detection code, but the processor treats those reduced bits as + * 'keyID' thus they are not reserved bits. Therefore KVM needs to look at + * the physical address bits reported by CPUID. */ - if (!boot_cpu_has(X86_FEATURE_TME) || - WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008)) - return boot_cpu_data.x86_phys_bits; + if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008)) + return cpuid_eax(0x80000008) & 0xff; - return cpuid_eax(0x80000008) & 0xff; + /* + * Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with + * custom CPUID. Proceed with whatever the kernel found since these features + * aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008). + */ + return boot_cpu_data.x86_phys_bits; } static void kvm_mmu_reset_all_pte_masks(void) @@ -1282,12 +1286,12 @@ static bool mmu_gfn_lpage_is_disallowed(struct kvm_vcpu *vcpu, gfn_t gfn, return __mmu_gfn_lpage_is_disallowed(gfn, level, slot); } -static int host_mapping_level(struct kvm *kvm, gfn_t gfn) +static int host_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn) { unsigned long page_size; int i, ret = 0; - page_size = kvm_host_page_size(kvm, gfn); + page_size = kvm_host_page_size(vcpu, gfn); for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { if (page_size >= KVM_HPAGE_SIZE(i)) @@ -1337,7 +1341,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn, if (unlikely(*force_pt_level)) return PT_PAGE_TABLE_LEVEL; - host_level = host_mapping_level(vcpu->kvm, large_gfn); + host_level = host_mapping_level(vcpu, large_gfn); if (host_level == PT_PAGE_TABLE_LEVEL) return host_level; @@ -3528,7 +3532,7 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte) * - true: let the vcpu to access on the same address again. * - false: let the real page fault path to fix it. */ -static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, +static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int level, u32 error_code) { struct kvm_shadow_walk_iterator iterator; @@ -3548,7 +3552,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, do { u64 new_spte; - for_each_shadow_entry_lockless(vcpu, gva, iterator, spte) + for_each_shadow_entry_lockless(vcpu, cr2_or_gpa, iterator, spte) if (!is_shadow_present_pte(spte) || iterator.level < level) break; @@ -3626,7 +3630,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, } while (true); - trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, + trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep, spte, fault_handled); walk_shadow_page_lockless_end(vcpu); @@ -3634,10 +3638,11 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, - gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable); + gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, + bool *writable); static int make_mmu_pages_available(struct kvm_vcpu *vcpu); -static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, +static int nonpaging_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, gfn_t gfn, bool prefault) { int r; @@ -3663,16 +3668,16 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); } - if (fast_page_fault(vcpu, v, level, error_code)) + if (fast_page_fault(vcpu, gpa, level, error_code)) return RET_PF_RETRY; mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); - if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable)) + if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable)) return RET_PF_RETRY; - if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) + if (handle_abnormal_pfn(vcpu, gpa, gfn, pfn, ACC_ALL, &r)) return r; r = RET_PF_RETRY; @@ -3683,7 +3688,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); - r = __direct_map(vcpu, v, write, map_writable, level, pfn, + r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault, false); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); @@ -3981,7 +3986,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots); -static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, +static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gpa_t vaddr, u32 access, struct x86_exception *exception) { if (exception) @@ -3989,7 +3994,7 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, return vaddr; } -static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, +static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gpa_t vaddr, u32 access, struct x86_exception *exception) { @@ -4149,13 +4154,14 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) walk_shadow_page_lockless_end(vcpu); } -static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, +static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, bool prefault) { - gfn_t gfn = gva >> PAGE_SHIFT; + gfn_t gfn = gpa >> PAGE_SHIFT; int r; - pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); + /* Note, paging is disabled, ergo gva == gpa. */ + pgprintk("%s: gva %lx error %x\n", __func__, gpa, error_code); if (page_fault_handle_page_track(vcpu, error_code, gfn)) return RET_PF_EMULATE; @@ -4167,11 +4173,12 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)); - return nonpaging_map(vcpu, gva & PAGE_MASK, + return nonpaging_map(vcpu, gpa & PAGE_MASK, error_code, gfn, prefault); } -static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) +static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + gfn_t gfn) { struct kvm_arch_async_pf arch; @@ -4180,11 +4187,13 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) arch.direct_map = vcpu->arch.mmu->direct_map; arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu); - return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); + return kvm_setup_async_pf(vcpu, cr2_or_gpa, + kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, - gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable) + gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, + bool *writable) { struct kvm_memory_slot *slot; bool async; @@ -4204,12 +4213,12 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, return false; /* *pfn has correct page already */ if (!prefault && kvm_can_do_async_pf(vcpu)) { - trace_kvm_try_async_get_page(gva, gfn); + trace_kvm_try_async_get_page(cr2_or_gpa, gfn); if (kvm_find_async_pf_gfn(vcpu, gfn)) { - trace_kvm_async_pf_doublefault(gva, gfn); + trace_kvm_async_pf_doublefault(cr2_or_gpa, gfn); kvm_make_request(KVM_REQ_APF_HALT, vcpu); return true; - } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn)) + } else if (kvm_arch_setup_async_pf(vcpu, cr2_or_gpa, gfn)) return true; } @@ -4222,6 +4231,12 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, { int r = 1; +#ifndef CONFIG_X86_64 + /* A 64-bit CR2 should be impossible on 32-bit KVM. */ + if (WARN_ON_ONCE(fault_address >> 32)) + return -EFAULT; +#endif + vcpu->arch.l1tf_flush_l1d = true; switch (vcpu->arch.apf.host_apf_reason) { default: @@ -4259,7 +4274,7 @@ check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level) return kvm_mtrr_check_gfn_range_consistency(vcpu, gfn, page_num); } -static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, +static int tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, bool prefault) { kvm_pfn_t pfn; @@ -5516,7 +5531,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu) return 0; } -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len) { int r, emulation_type = 0; @@ -5525,18 +5540,18 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, /* With shadow page tables, fault_address contains a GVA or nGPA. */ if (vcpu->arch.mmu->direct_map) { vcpu->arch.gpa_available = true; - vcpu->arch.gpa_val = cr2; + vcpu->arch.gpa_val = cr2_or_gpa; } r = RET_PF_INVALID; if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, cr2, direct); + r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct); if (r == RET_PF_EMULATE) goto emulate; } if (r == RET_PF_INVALID) { - r = vcpu->arch.mmu->page_fault(vcpu, cr2, + r = vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa, lower_32_bits(error_code), false); WARN_ON(r == RET_PF_INVALID); @@ -5556,7 +5571,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, */ if (vcpu->arch.mmu->direct_map && (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) { - kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2)); + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa)); return 1; } @@ -5571,7 +5586,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, * explicitly shadowing L1's page tables, i.e. unprotecting something * for L1 isn't going to magically fix whatever issue cause L2 to fail. */ - if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu)) + if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu)) emulation_type = EMULTYPE_ALLOW_RETRY; emulate: /* @@ -5586,7 +5601,7 @@ emulate: return 1; } - return x86_emulate_instruction(vcpu, cr2, emulation_type, insn, + return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn, insn_len); } EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); @@ -6249,7 +6264,7 @@ static void kvm_set_mmio_spte_mask(void) * If reserved bit is not supported, clear the present bit to disable * mmio page fault. */ - if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52) + if (shadow_phys_bits == 52) mask &= ~1ull; kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 7ca8831c7d1a..3c6522b84ff1 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -249,13 +249,13 @@ TRACE_EVENT( TRACE_EVENT( fast_page_fault, - TP_PROTO(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, + TP_PROTO(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code, u64 *sptep, u64 old_spte, bool retry), - TP_ARGS(vcpu, gva, error_code, sptep, old_spte, retry), + TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, retry), TP_STRUCT__entry( __field(int, vcpu_id) - __field(gva_t, gva) + __field(gpa_t, cr2_or_gpa) __field(u32, error_code) __field(u64 *, sptep) __field(u64, old_spte) @@ -265,7 +265,7 @@ TRACE_EVENT( TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; - __entry->gva = gva; + __entry->cr2_or_gpa = cr2_or_gpa; __entry->error_code = error_code; __entry->sptep = sptep; __entry->old_spte = old_spte; @@ -273,9 +273,9 @@ TRACE_EVENT( __entry->retry = retry; ), - TP_printk("vcpu %d gva %lx error_code %s sptep %p old %#llx" + TP_printk("vcpu %d gva %llx error_code %s sptep %p old %#llx" " new %llx spurious %d fixed %d", __entry->vcpu_id, - __entry->gva, __print_flags(__entry->error_code, "|", + __entry->cr2_or_gpa, __print_flags(__entry->error_code, "|", kvm_mmu_trace_pferr_flags), __entry->sptep, __entry->old_spte, __entry->new_spte, __spte_satisfied(old_spte), __spte_satisfied(new_spte) diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 25ce3edd1872..7f0059aa30e1 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -192,11 +192,15 @@ static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit) break; case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000: *seg = 1; - *unit = msr - MSR_MTRRfix16K_80000; + *unit = array_index_nospec( + msr - MSR_MTRRfix16K_80000, + MSR_MTRRfix16K_A0000 - MSR_MTRRfix16K_80000 + 1); break; case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000: *seg = 2; - *unit = msr - MSR_MTRRfix4K_C0000; + *unit = array_index_nospec( + msr - MSR_MTRRfix4K_C0000, + MSR_MTRRfix4K_F8000 - MSR_MTRRfix4K_C0000 + 1); break; default: return false; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 97b21e7fd013..c1d7b866a03f 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -291,11 +291,11 @@ static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte) } /* - * Fetch a guest pte for a guest virtual address + * Fetch a guest pte for a guest virtual address, or for an L2's GPA. */ static int FNAME(walk_addr_generic)(struct guest_walker *walker, struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gva_t addr, u32 access) + gpa_t addr, u32 access) { int ret; pt_element_t pte; @@ -496,7 +496,7 @@ error: } static int FNAME(walk_addr)(struct guest_walker *walker, - struct kvm_vcpu *vcpu, gva_t addr, u32 access) + struct kvm_vcpu *vcpu, gpa_t addr, u32 access) { return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr, access); @@ -611,7 +611,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, * If the guest tries to write a write-protected page, we need to * emulate this operation, return 1 to indicate this case. */ -static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, +static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr, struct guest_walker *gw, int write_fault, int hlevel, kvm_pfn_t pfn, bool map_writable, bool prefault, @@ -765,7 +765,7 @@ FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu, * Returns: 1 if we need to emulate the instruction, 0 otherwise, or * a negative value on error. */ -static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, +static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code, bool prefault) { int write_fault = error_code & PFERR_WRITE_MASK; @@ -945,18 +945,19 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa) spin_unlock(&vcpu->kvm->mmu_lock); } -static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, +/* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */ +static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t addr, u32 access, struct x86_exception *exception) { struct guest_walker walker; gpa_t gpa = UNMAPPED_GVA; int r; - r = FNAME(walk_addr)(&walker, vcpu, vaddr, access); + r = FNAME(walk_addr)(&walker, vcpu, addr, access); if (r) { gpa = gfn_to_gpa(walker.gfn); - gpa |= vaddr & ~PAGE_MASK; + gpa |= addr & ~PAGE_MASK; } else if (exception) *exception = walker.fault; @@ -964,7 +965,8 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, } #if PTTYPE != PTTYPE_EPT -static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, +/* Note, gva_to_gpa_nested() is only used to translate L2 GVAs. */ +static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr, u32 access, struct x86_exception *exception) { @@ -972,6 +974,11 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, gpa_t gpa = UNMAPPED_GVA; int r; +#ifndef CONFIG_X86_64 + /* A 64-bit GVA should be impossible on 32-bit KVM. */ + WARN_ON_ONCE(vaddr >> 32); +#endif + r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access); if (r) { diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 58265f761c3b..3fc98afd72a8 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -2,6 +2,8 @@ #ifndef __KVM_X86_PMU_H #define __KVM_X86_PMU_H +#include + #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu) #define pmu_to_vcpu(pmu) (container_of((pmu), struct kvm_vcpu, arch.pmu)) #define pmc_to_pmu(pmc) (&(pmc)->vcpu->arch.pmu) @@ -86,8 +88,12 @@ static inline bool pmc_is_enabled(struct kvm_pmc *pmc) static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr, u32 base) { - if (msr >= base && msr < base + pmu->nr_arch_gp_counters) - return &pmu->gp_counters[msr - base]; + if (msr >= base && msr < base + pmu->nr_arch_gp_counters) { + u32 index = array_index_nospec(msr - base, + pmu->nr_arch_gp_counters); + + return &pmu->gp_counters[index]; + } return NULL; } @@ -97,8 +103,12 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr) { int base = MSR_CORE_PERF_FIXED_CTR0; - if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) - return &pmu->fixed_counters[msr - base]; + if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) { + u32 index = array_index_nospec(msr - base, + pmu->nr_arch_fixed_counters); + + return &pmu->fixed_counters[index]; + } return NULL; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c5673bda4b66..8d1be7c61f10 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5986,6 +5986,11 @@ static bool svm_has_wbinvd_exit(void) return true; } +static bool svm_pku_supported(void) +{ + return false; +} + #define PRE_EX(exit) { .exit_code = (exit), \ .stage = X86_ICPT_PRE_EXCEPT, } #define POST_EX(exit) { .exit_code = (exit), \ @@ -7278,6 +7283,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .xsaves_supported = svm_xsaves_supported, .umip_emulated = svm_umip_emulated, .pt_supported = svm_pt_supported, + .pku_supported = svm_pku_supported, .set_supported_cpuid = svm_set_supported_cpuid, diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 7aa69716d516..283bdb7071af 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -145,6 +145,11 @@ static inline bool vmx_umip_emulated(void) SECONDARY_EXEC_DESC; } +static inline bool vmx_pku_supported(void) +{ + return boot_cpu_has(X86_FEATURE_PKU); +} + static inline bool cpu_has_vmx_rdtscp(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index d0523741fb03..931d3b5f3acd 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4663,8 +4663,10 @@ static int handle_vmread(struct kvm_vcpu *vcpu) vmx_instruction_info, true, len, &gva)) return 1; /* _system ok, nested_vmx_check_permission has verified cpl=0 */ - if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e)) + if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e)) { kvm_inject_page_fault(vcpu, &e); + return 1; + } } return nested_vmx_succeed(vcpu); diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 3e9c059099e9..f8998a7bc7d5 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -84,10 +84,14 @@ static unsigned intel_find_arch_event(struct kvm_pmu *pmu, static unsigned intel_find_fixed_event(int idx) { - if (idx >= ARRAY_SIZE(fixed_pmc_events)) + u32 event; + size_t size = ARRAY_SIZE(fixed_pmc_events); + + if (idx >= size) return PERF_COUNT_HW_MAX; - return intel_arch_events[fixed_pmc_events[idx]].event_type; + event = fixed_pmc_events[array_index_nospec(idx, size)]; + return intel_arch_events[event].event_type; } /* check if a PMC is enabled by comparing it with globl_ctrl bits. */ @@ -128,16 +132,20 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); bool fixed = idx & (1u << 30); struct kvm_pmc *counters; + unsigned int num_counters; idx &= ~(3u << 30); - if (!fixed && idx >= pmu->nr_arch_gp_counters) - return NULL; - if (fixed && idx >= pmu->nr_arch_fixed_counters) + if (fixed) { + counters = pmu->fixed_counters; + num_counters = pmu->nr_arch_fixed_counters; + } else { + counters = pmu->gp_counters; + num_counters = pmu->nr_arch_gp_counters; + } + if (idx >= num_counters) return NULL; - counters = fixed ? pmu->fixed_counters : pmu->gp_counters; *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP]; - - return &counters[idx]; + return &counters[array_index_nospec(idx, num_counters)]; } static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f09a213fd5cb..dc7c166c4335 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2140,6 +2140,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_num_address_ranges))) return 1; + if (is_noncanonical_address(data, vcpu)) + return 1; if (index % 2) vmx->pt_desc.guest.addr_b[index / 2] = data; else @@ -7865,6 +7867,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .xsaves_supported = vmx_xsaves_supported, .umip_emulated = vmx_umip_emulated, .pt_supported = vmx_pt_supported, + .pku_supported = vmx_pku_supported, .request_immediate_exit = vmx_request_immediate_exit, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8d82ec0482fc..edde5ee8c6f5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -92,6 +92,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); #endif +static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS; + #define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ #define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ @@ -886,9 +888,38 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) } EXPORT_SYMBOL_GPL(kvm_set_xcr); +static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c) +{ + u64 reserved_bits = CR4_RESERVED_BITS; + + if (!cpu_has(c, X86_FEATURE_XSAVE)) + reserved_bits |= X86_CR4_OSXSAVE; + + if (!cpu_has(c, X86_FEATURE_SMEP)) + reserved_bits |= X86_CR4_SMEP; + + if (!cpu_has(c, X86_FEATURE_SMAP)) + reserved_bits |= X86_CR4_SMAP; + + if (!cpu_has(c, X86_FEATURE_FSGSBASE)) + reserved_bits |= X86_CR4_FSGSBASE; + + if (!cpu_has(c, X86_FEATURE_PKU)) + reserved_bits |= X86_CR4_PKE; + + if (!cpu_has(c, X86_FEATURE_LA57) && + !(cpuid_ecx(0x7) & bit(X86_FEATURE_LA57))) + reserved_bits |= X86_CR4_LA57; + + if (!cpu_has(c, X86_FEATURE_UMIP) && !kvm_x86_ops->umip_emulated()) + reserved_bits |= X86_CR4_UMIP; + + return reserved_bits; +} + static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { - if (cr4 & CR4_RESERVED_BITS) + if (cr4 & cr4_reserved_bits) return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE)) @@ -1054,9 +1085,11 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) { + size_t size = ARRAY_SIZE(vcpu->arch.db); + switch (dr) { case 0 ... 3: - vcpu->arch.db[dr] = val; + vcpu->arch.db[array_index_nospec(dr, size)] = val; if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) vcpu->arch.eff_db[dr] = val; break; @@ -1093,9 +1126,11 @@ EXPORT_SYMBOL_GPL(kvm_set_dr); int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) { + size_t size = ARRAY_SIZE(vcpu->arch.db); + switch (dr) { case 0 ... 3: - *val = vcpu->arch.db[dr]; + *val = vcpu->arch.db[array_index_nospec(dr, size)]; break; case 4: /* fall through */ @@ -2490,7 +2525,10 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) default: if (msr >= MSR_IA32_MC0_CTL && msr < MSR_IA32_MCx_CTL(bank_num)) { - u32 offset = msr - MSR_IA32_MC0_CTL; + u32 offset = array_index_nospec( + msr - MSR_IA32_MC0_CTL, + MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL); + /* only 0 or all 1s can be written to IA32_MCi_CTL * some Linux kernels though clear bit 10 in bank 4 to * workaround a BIOS/GART TBL issue on AMD K8s, ignore @@ -2586,45 +2624,47 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) static void record_steal_time(struct kvm_vcpu *vcpu) { + struct kvm_host_map map; + struct kvm_steal_time *st; + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; - if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) + /* -EAGAIN is returned in atomic context so we can just return. */ + if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, + &map, &vcpu->arch.st.cache, false)) return; + st = map.hva + + offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); + /* * Doing a TLB flush here, on the guest's behalf, can avoid * expensive IPIs. */ trace_kvm_pv_tlb_flush(vcpu->vcpu_id, - vcpu->arch.st.steal.preempted & KVM_VCPU_FLUSH_TLB); - if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB) + st->preempted & KVM_VCPU_FLUSH_TLB); + if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB) kvm_vcpu_flush_tlb(vcpu, false); - if (vcpu->arch.st.steal.version & 1) - vcpu->arch.st.steal.version += 1; /* first time write, random junk */ + vcpu->arch.st.preempted = 0; - vcpu->arch.st.steal.version += 1; + if (st->version & 1) + st->version += 1; /* first time write, random junk */ - kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); + st->version += 1; smp_wmb(); - vcpu->arch.st.steal.steal += current->sched_info.run_delay - + st->steal += current->sched_info.run_delay - vcpu->arch.st.last_steal; vcpu->arch.st.last_steal = current->sched_info.run_delay; - kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); - smp_wmb(); - vcpu->arch.st.steal.version += 1; + st->version += 1; - kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); + kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false); } int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -2777,11 +2817,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & KVM_STEAL_RESERVED_MASK) return 1; - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime, - data & KVM_STEAL_VALID_BITS, - sizeof(struct kvm_steal_time))) - return 1; - vcpu->arch.st.msr_val = data; if (!(data & KVM_MSR_ENABLED)) @@ -2917,7 +2952,10 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) default: if (msr >= MSR_IA32_MC0_CTL && msr < MSR_IA32_MCx_CTL(bank_num)) { - u32 offset = msr - MSR_IA32_MC0_CTL; + u32 offset = array_index_nospec( + msr - MSR_IA32_MC0_CTL, + MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL); + data = vcpu->arch.mce_banks[offset]; break; } @@ -3443,10 +3481,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_x86_ops->vcpu_load(vcpu, cpu); - fpregs_assert_state_consistent(); - if (test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_return(); - /* Apply any externally detected TSC adjustments (due to suspend) */ if (unlikely(vcpu->arch.tsc_offset_adjustment)) { adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment); @@ -3486,15 +3520,25 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) { + struct kvm_host_map map; + struct kvm_steal_time *st; + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; - vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED; + if (vcpu->arch.st.preempted) + return; + + if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map, + &vcpu->arch.st.cache, true)) + return; - kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal.preempted, - offsetof(struct kvm_steal_time, preempted), - sizeof(vcpu->arch.st.steal.preempted)); + st = map.hva + + offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); + + st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; + + kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -6365,11 +6409,11 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) return 1; } -static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, +static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, bool write_fault_to_shadow_pgtable, int emulation_type) { - gpa_t gpa = cr2; + gpa_t gpa = cr2_or_gpa; kvm_pfn_t pfn; if (!(emulation_type & EMULTYPE_ALLOW_RETRY)) @@ -6383,7 +6427,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, * Write permission should be allowed since only * write access need to be emulated. */ - gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); + gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); /* * If the mapping is invalid in guest, let cpu retry @@ -6440,10 +6484,10 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, } static bool retry_instruction(struct x86_emulate_ctxt *ctxt, - unsigned long cr2, int emulation_type) + gpa_t cr2_or_gpa, int emulation_type) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - unsigned long last_retry_eip, last_retry_addr, gpa = cr2; + unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa; last_retry_eip = vcpu->arch.last_retry_eip; last_retry_addr = vcpu->arch.last_retry_addr; @@ -6472,14 +6516,14 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, if (x86_page_table_writing_insn(ctxt)) return false; - if (ctxt->eip == last_retry_eip && last_retry_addr == cr2) + if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa) return false; vcpu->arch.last_retry_eip = ctxt->eip; - vcpu->arch.last_retry_addr = cr2; + vcpu->arch.last_retry_addr = cr2_or_gpa; if (!vcpu->arch.mmu->direct_map) - gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); + gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); @@ -6625,11 +6669,8 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt) return false; } -int x86_emulate_instruction(struct kvm_vcpu *vcpu, - unsigned long cr2, - int emulation_type, - void *insn, - int insn_len) +int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + int emulation_type, void *insn, int insn_len) { int r; struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; @@ -6675,8 +6716,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, kvm_queue_exception(vcpu, UD_VECTOR); return 1; } - if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, - emulation_type)) + if (reexecute_instruction(vcpu, cr2_or_gpa, + write_fault_to_spt, + emulation_type)) return 1; if (ctxt->have_exception) { /* @@ -6710,7 +6752,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, return 1; } - if (retry_instruction(ctxt, cr2, emulation_type)) + if (retry_instruction(ctxt, cr2_or_gpa, emulation_type)) return 1; /* this is needed for vmware backdoor interface to work since it @@ -6722,7 +6764,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, restart: /* Save the faulting GPA (cr2) in the address field */ - ctxt->exception.address = cr2; + ctxt->exception.address = cr2_or_gpa; r = x86_emulate_insn(ctxt); @@ -6730,7 +6772,7 @@ restart: return 1; if (r == EMULATION_FAILED) { - if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, + if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt, emulation_type)) return 1; @@ -8172,8 +8214,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) trace_kvm_entry(vcpu->vcpu_id); guest_enter_irqoff(); - /* The preempt notifier should have taken care of the FPU already. */ - WARN_ON_ONCE(test_thread_flag(TIF_NEED_FPU_LOAD)); + fpregs_assert_state_consistent(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_return(); if (unlikely(vcpu->arch.switch_db_regs)) { set_debugreg(0, 7); @@ -8445,12 +8488,26 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) return 0; } +static void kvm_save_current_fpu(struct fpu *fpu) +{ + /* + * If the target FPU state is not resident in the CPU registers, just + * memcpy() from current, else save CPU state directly to the target. + */ + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + memcpy(&fpu->state, ¤t->thread.fpu.state, + fpu_kernel_xstate_size); + else + copy_fpregs_to_fpstate(fpu); +} + /* Swap (qemu) user FPU context for the guest FPU context. */ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { fpregs_lock(); - copy_fpregs_to_fpstate(vcpu->arch.user_fpu); + kvm_save_current_fpu(vcpu->arch.user_fpu); + /* PKRU is separately restored in kvm_x86_ops->run. */ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, ~XFEATURE_MASK_PKRU); @@ -8466,7 +8523,8 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { fpregs_lock(); - copy_fpregs_to_fpstate(vcpu->arch.guest_fpu); + kvm_save_current_fpu(vcpu->arch.guest_fpu); + copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state); fpregs_mark_activate(); @@ -8688,6 +8746,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { vcpu_load(vcpu); + if (kvm_mpx_supported()) + kvm_load_guest_fpu(vcpu); kvm_apic_accept_events(vcpu); if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && @@ -8696,6 +8756,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, else mp_state->mp_state = vcpu->arch.mp_state; + if (kvm_mpx_supported()) + kvm_put_guest_fpu(vcpu); vcpu_put(vcpu); return 0; } @@ -9055,6 +9117,9 @@ static void fx_init(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask; + struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache; + + kvm_release_pfn(cache->pfn, cache->dirty, cache); kvmclock_reset(vcpu); @@ -9125,7 +9190,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_mmu_unload(vcpu); vcpu_put(vcpu); - kvm_x86_ops->vcpu_free(vcpu); + kvm_arch_vcpu_free(vcpu); } void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) @@ -9317,6 +9382,8 @@ int kvm_arch_hardware_setup(void) if (r != 0) return r; + cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data); + if (kvm_has_tsc_control) { /* * Make sure the user can only configure tsc_khz values that @@ -9719,11 +9786,18 @@ out_free: void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) { + struct kvm_vcpu *vcpu; + int i; + /* * memslots->generation has been incremented. * mmio generation may have reached its maximum value. */ kvm_mmu_invalidate_mmio_sptes(kvm, gen); + + /* Force re-initialization of steal_time cache */ + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_vcpu_kick(vcpu); } int kvm_arch_prepare_memory_region(struct kvm *kvm, @@ -9975,7 +10049,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu)) return; - vcpu->arch.mmu->page_fault(vcpu, work->gva, 0, true); + vcpu->arch.mmu->page_fault(vcpu, work->cr2_or_gpa, 0, true); } static inline u32 kvm_async_pf_hash_fn(gfn_t gfn) @@ -10088,7 +10162,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, { struct x86_exception fault; - trace_kvm_async_pf_not_present(work->arch.token, work->gva); + trace_kvm_async_pf_not_present(work->arch.token, work->cr2_or_gpa); kvm_add_async_pf_gfn(vcpu, work->arch.gfn); if (kvm_can_deliver_async_pf(vcpu) && @@ -10123,7 +10197,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, work->arch.token = ~0; /* broadcast wakeup */ else kvm_del_async_pf_gfn(vcpu, work->arch.gfn); - trace_kvm_async_pf_ready(work->arch.token, work->gva); + trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa); if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED && !apf_get_user(vcpu, &val)) { diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index dbf7442a822b..de6b55484876 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -286,7 +286,7 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int page_num); bool kvm_vector_hashing_enabled(void); -int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, +int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type, void *insn, int insn_len); #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5bfea374a160..6ea215cdeada 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1215,6 +1215,7 @@ asmlinkage __visible void __init xen_start_kernel(void) x86_platform.get_nmi_reason = xen_get_nmi_reason; x86_init.resources.memory_setup = xen_memory_setup; + x86_init.irqs.intr_mode_select = x86_init_noop; x86_init.irqs.intr_mode_init = x86_init_noop; x86_init.oem.arch_setup = xen_arch_setup; x86_init.oem.banner = xen_banner; diff --git a/crypto/algapi.c b/crypto/algapi.c index de30ddc952d8..bb8329e49956 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -257,6 +257,7 @@ void crypto_alg_tested(const char *name, int err) struct crypto_alg *alg; struct crypto_alg *q; LIST_HEAD(list); + bool best; down_write(&crypto_alg_sem); list_for_each_entry(q, &crypto_alg_list, cra_list) { @@ -280,6 +281,21 @@ found: alg->cra_flags |= CRYPTO_ALG_TESTED; + /* Only satisfy larval waiters if we are the best. */ + best = true; + list_for_each_entry(q, &crypto_alg_list, cra_list) { + if (crypto_is_moribund(q) || !crypto_is_larval(q)) + continue; + + if (strcmp(alg->cra_name, q->cra_name)) + continue; + + if (q->cra_priority > alg->cra_priority) { + best = false; + break; + } + } + list_for_each_entry(q, &crypto_alg_list, cra_list) { if (q == alg) continue; @@ -303,10 +319,12 @@ found: continue; if ((q->cra_flags ^ alg->cra_flags) & larval->mask) continue; - if (!crypto_mod_get(alg)) - continue; - larval->adult = alg; + if (best && crypto_mod_get(alg)) + larval->adult = alg; + else + larval->adult = ERR_PTR(-EAGAIN); + continue; } @@ -669,11 +687,9 @@ EXPORT_SYMBOL_GPL(crypto_grab_spawn); void crypto_drop_spawn(struct crypto_spawn *spawn) { - if (!spawn->alg) - return; - down_write(&crypto_alg_sem); - list_del(&spawn->list); + if (spawn->alg) + list_del(&spawn->list); up_write(&crypto_alg_sem); } EXPORT_SYMBOL_GPL(crypto_drop_spawn); @@ -681,22 +697,16 @@ EXPORT_SYMBOL_GPL(crypto_drop_spawn); static struct crypto_alg *crypto_spawn_alg(struct crypto_spawn *spawn) { struct crypto_alg *alg; - struct crypto_alg *alg2; down_read(&crypto_alg_sem); alg = spawn->alg; - alg2 = alg; - if (alg2) - alg2 = crypto_mod_get(alg2); - up_read(&crypto_alg_sem); - - if (!alg2) { - if (alg) - crypto_shoot_alg(alg); - return ERR_PTR(-EAGAIN); + if (alg && !crypto_mod_get(alg)) { + alg->cra_flags |= CRYPTO_ALG_DYING; + alg = NULL; } + up_read(&crypto_alg_sem); - return alg; + return alg ?: ERR_PTR(-EAGAIN); } struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type, diff --git a/crypto/api.c b/crypto/api.c index d8ba54142620..eda0c56b8615 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -97,7 +97,7 @@ static void crypto_larval_destroy(struct crypto_alg *alg) struct crypto_larval *larval = (void *)alg; BUG_ON(!crypto_is_larval(alg)); - if (larval->adult) + if (!IS_ERR_OR_NULL(larval->adult)) crypto_mod_put(larval->adult); kfree(larval); } @@ -178,6 +178,8 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg) alg = ERR_PTR(-ETIMEDOUT); else if (!alg) alg = ERR_PTR(-ENOENT); + else if (IS_ERR(alg)) + ; else if (crypto_is_test_larval(larval) && !(alg->cra_flags & CRYPTO_ALG_TESTED)) alg = ERR_PTR(-EAGAIN); @@ -344,13 +346,12 @@ static unsigned int crypto_ctxsize(struct crypto_alg *alg, u32 type, u32 mask) return len; } -void crypto_shoot_alg(struct crypto_alg *alg) +static void crypto_shoot_alg(struct crypto_alg *alg) { down_write(&crypto_alg_sem); alg->cra_flags |= CRYPTO_ALG_DYING; up_write(&crypto_alg_sem); } -EXPORT_SYMBOL_GPL(crypto_shoot_alg); struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type, u32 mask) diff --git a/crypto/internal.h b/crypto/internal.h index 93df7bec844a..e506a57e2243 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -68,7 +68,6 @@ void crypto_alg_tested(const char *name, int err); void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list, struct crypto_alg *nalg); void crypto_remove_final(struct list_head *list); -void crypto_shoot_alg(struct crypto_alg *alg); struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type, u32 mask); void *crypto_create_tfm(struct crypto_alg *alg, diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index 81bbea7f2ba6..a4f3b3f342c8 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -24,6 +24,8 @@ static struct kset *pcrypt_kset; struct pcrypt_instance_ctx { struct crypto_aead_spawn spawn; + struct padata_shell *psenc; + struct padata_shell *psdec; atomic_t tfm_count; }; @@ -32,6 +34,12 @@ struct pcrypt_aead_ctx { unsigned int cb_cpu; }; +static inline struct pcrypt_instance_ctx *pcrypt_tfm_ictx( + struct crypto_aead *tfm) +{ + return aead_instance_ctx(aead_alg_instance(tfm)); +} + static int pcrypt_aead_setkey(struct crypto_aead *parent, const u8 *key, unsigned int keylen) { @@ -63,7 +71,6 @@ static void pcrypt_aead_done(struct crypto_async_request *areq, int err) struct padata_priv *padata = pcrypt_request_padata(preq); padata->info = err; - req->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; padata_do_serial(padata); } @@ -90,6 +97,9 @@ static int pcrypt_aead_encrypt(struct aead_request *req) struct crypto_aead *aead = crypto_aead_reqtfm(req); struct pcrypt_aead_ctx *ctx = crypto_aead_ctx(aead); u32 flags = aead_request_flags(req); + struct pcrypt_instance_ctx *ictx; + + ictx = pcrypt_tfm_ictx(aead); memset(padata, 0, sizeof(struct padata_priv)); @@ -103,7 +113,7 @@ static int pcrypt_aead_encrypt(struct aead_request *req) req->cryptlen, req->iv); aead_request_set_ad(creq, req->assoclen); - err = padata_do_parallel(pencrypt, padata, &ctx->cb_cpu); + err = padata_do_parallel(ictx->psenc, padata, &ctx->cb_cpu); if (!err) return -EINPROGRESS; @@ -132,6 +142,9 @@ static int pcrypt_aead_decrypt(struct aead_request *req) struct crypto_aead *aead = crypto_aead_reqtfm(req); struct pcrypt_aead_ctx *ctx = crypto_aead_ctx(aead); u32 flags = aead_request_flags(req); + struct pcrypt_instance_ctx *ictx; + + ictx = pcrypt_tfm_ictx(aead); memset(padata, 0, sizeof(struct padata_priv)); @@ -145,7 +158,7 @@ static int pcrypt_aead_decrypt(struct aead_request *req) req->cryptlen, req->iv); aead_request_set_ad(creq, req->assoclen); - err = padata_do_parallel(pdecrypt, padata, &ctx->cb_cpu); + err = padata_do_parallel(ictx->psdec, padata, &ctx->cb_cpu); if (!err) return -EINPROGRESS; @@ -192,6 +205,8 @@ static void pcrypt_free(struct aead_instance *inst) struct pcrypt_instance_ctx *ctx = aead_instance_ctx(inst); crypto_drop_aead(&ctx->spawn); + padata_free_shell(ctx->psdec); + padata_free_shell(ctx->psenc); kfree(inst); } @@ -233,12 +248,22 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb, if (!inst) return -ENOMEM; + err = -ENOMEM; + ctx = aead_instance_ctx(inst); + ctx->psenc = padata_alloc_shell(pencrypt); + if (!ctx->psenc) + goto out_free_inst; + + ctx->psdec = padata_alloc_shell(pdecrypt); + if (!ctx->psdec) + goto out_free_psenc; + crypto_set_aead_spawn(&ctx->spawn, aead_crypto_instance(inst)); err = crypto_grab_aead(&ctx->spawn, name, 0, 0); if (err) - goto out_free_inst; + goto out_free_psdec; alg = crypto_spawn_aead_alg(&ctx->spawn); err = pcrypt_init_instance(aead_crypto_instance(inst), &alg->base); @@ -271,6 +296,10 @@ out: out_drop_aead: crypto_drop_aead(&ctx->spawn); +out_free_psdec: + padata_free_shell(ctx->psdec); +out_free_psenc: + padata_free_shell(ctx->psenc); out_free_inst: kfree(inst); goto out; diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 558fedf8a7a1..254a7d98b9d4 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -38,6 +38,8 @@ #define PREFIX "ACPI: " #define ACPI_BATTERY_VALUE_UNKNOWN 0xFFFFFFFF +#define ACPI_BATTERY_CAPACITY_VALID(capacity) \ + ((capacity) != 0 && (capacity) != ACPI_BATTERY_VALUE_UNKNOWN) #define ACPI_BATTERY_DEVICE_NAME "Battery" @@ -192,7 +194,8 @@ static int acpi_battery_is_charged(struct acpi_battery *battery) static bool acpi_battery_is_degraded(struct acpi_battery *battery) { - return battery->full_charge_capacity && battery->design_capacity && + return ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity) && + ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity) && battery->full_charge_capacity < battery->design_capacity; } @@ -214,7 +217,7 @@ static int acpi_battery_get_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val) { - int ret = 0; + int full_capacity = ACPI_BATTERY_VALUE_UNKNOWN, ret = 0; struct acpi_battery *battery = to_acpi_battery(psy); if (acpi_battery_present(battery)) { @@ -263,14 +266,14 @@ static int acpi_battery_get_property(struct power_supply *psy, break; case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN: case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN: - if (battery->design_capacity == ACPI_BATTERY_VALUE_UNKNOWN) + if (!ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity)) ret = -ENODEV; else val->intval = battery->design_capacity * 1000; break; case POWER_SUPPLY_PROP_CHARGE_FULL: case POWER_SUPPLY_PROP_ENERGY_FULL: - if (battery->full_charge_capacity == ACPI_BATTERY_VALUE_UNKNOWN) + if (!ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity)) ret = -ENODEV; else val->intval = battery->full_charge_capacity * 1000; @@ -283,11 +286,17 @@ static int acpi_battery_get_property(struct power_supply *psy, val->intval = battery->capacity_now * 1000; break; case POWER_SUPPLY_PROP_CAPACITY: - if (battery->capacity_now && battery->full_charge_capacity) - val->intval = battery->capacity_now * 100/ - battery->full_charge_capacity; + if (ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity)) + full_capacity = battery->full_charge_capacity; + else if (ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity)) + full_capacity = battery->design_capacity; + + if (battery->capacity_now == ACPI_BATTERY_VALUE_UNKNOWN || + full_capacity == ACPI_BATTERY_VALUE_UNKNOWN) + ret = -ENODEV; else - val->intval = 0; + val->intval = battery->capacity_now * 100/ + full_capacity; break; case POWER_SUPPLY_PROP_CAPACITY_LEVEL: if (battery->state & ACPI_BATTERY_STATE_CRITICAL) @@ -333,6 +342,20 @@ static enum power_supply_property charge_battery_props[] = { POWER_SUPPLY_PROP_SERIAL_NUMBER, }; +static enum power_supply_property charge_battery_full_cap_broken_props[] = { + POWER_SUPPLY_PROP_STATUS, + POWER_SUPPLY_PROP_PRESENT, + POWER_SUPPLY_PROP_TECHNOLOGY, + POWER_SUPPLY_PROP_CYCLE_COUNT, + POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN, + POWER_SUPPLY_PROP_VOLTAGE_NOW, + POWER_SUPPLY_PROP_CURRENT_NOW, + POWER_SUPPLY_PROP_CHARGE_NOW, + POWER_SUPPLY_PROP_MODEL_NAME, + POWER_SUPPLY_PROP_MANUFACTURER, + POWER_SUPPLY_PROP_SERIAL_NUMBER, +}; + static enum power_supply_property energy_battery_props[] = { POWER_SUPPLY_PROP_STATUS, POWER_SUPPLY_PROP_PRESENT, @@ -794,20 +817,34 @@ static void __exit battery_hook_exit(void) static int sysfs_add_battery(struct acpi_battery *battery) { struct power_supply_config psy_cfg = { .drv_data = battery, }; + bool full_cap_broken = false; + + if (!ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity) && + !ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity)) + full_cap_broken = true; if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) { - battery->bat_desc.properties = charge_battery_props; - battery->bat_desc.num_properties = - ARRAY_SIZE(charge_battery_props); - } else if (battery->full_charge_capacity == 0) { - battery->bat_desc.properties = - energy_battery_full_cap_broken_props; - battery->bat_desc.num_properties = - ARRAY_SIZE(energy_battery_full_cap_broken_props); + if (full_cap_broken) { + battery->bat_desc.properties = + charge_battery_full_cap_broken_props; + battery->bat_desc.num_properties = + ARRAY_SIZE(charge_battery_full_cap_broken_props); + } else { + battery->bat_desc.properties = charge_battery_props; + battery->bat_desc.num_properties = + ARRAY_SIZE(charge_battery_props); + } } else { - battery->bat_desc.properties = energy_battery_props; - battery->bat_desc.num_properties = - ARRAY_SIZE(energy_battery_props); + if (full_cap_broken) { + battery->bat_desc.properties = + energy_battery_full_cap_broken_props; + battery->bat_desc.num_properties = + ARRAY_SIZE(energy_battery_full_cap_broken_props); + } else { + battery->bat_desc.properties = energy_battery_props; + battery->bat_desc.num_properties = + ARRAY_SIZE(energy_battery_props); + } } battery->bat_desc.name = acpi_device_bid(battery->device); diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 31014c7d3793..e63fd7bfd3a5 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -336,6 +336,11 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Precision 7510"), }, }, + + /* + * Desktops which falsely report a backlight and which our heuristics + * for this do not catch. + */ { .callback = video_detect_force_none, .ident = "Dell OptiPlex 9020M", @@ -344,6 +349,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 9020M"), }, }, + { + .callback = video_detect_force_none, + .ident = "MSI MS-7721", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MSI"), + DMI_MATCH(DMI_PRODUCT_NAME, "MS-7721"), + }, + }, { }, }; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 134a8af51511..0e99a760aebd 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -273,10 +273,38 @@ static void dpm_wait_for_suppliers(struct device *dev, bool async) device_links_read_unlock(idx); } -static void dpm_wait_for_superior(struct device *dev, bool async) +static bool dpm_wait_for_superior(struct device *dev, bool async) { - dpm_wait(dev->parent, async); + struct device *parent; + + /* + * If the device is resumed asynchronously and the parent's callback + * deletes both the device and the parent itself, the parent object may + * be freed while this function is running, so avoid that by reference + * counting the parent once more unless the device has been deleted + * already (in which case return right away). + */ + mutex_lock(&dpm_list_mtx); + + if (!device_pm_initialized(dev)) { + mutex_unlock(&dpm_list_mtx); + return false; + } + + parent = get_device(dev->parent); + + mutex_unlock(&dpm_list_mtx); + + dpm_wait(parent, async); + put_device(parent); + dpm_wait_for_suppliers(dev, async); + + /* + * If the parent's callback has deleted the device, attempting to resume + * it would be invalid, so avoid doing that then. + */ + return device_pm_initialized(dev); } static void dpm_wait_for_consumers(struct device *dev, bool async) @@ -621,7 +649,8 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn if (!dev->power.is_noirq_suspended) goto Out; - dpm_wait_for_superior(dev, async); + if (!dpm_wait_for_superior(dev, async)) + goto Out; skip_resume = dev_pm_may_skip_resume(dev); @@ -829,7 +858,8 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn if (!dev->power.is_late_suspended) goto Out; - dpm_wait_for_superior(dev, async); + if (!dpm_wait_for_superior(dev, async)) + goto Out; callback = dpm_subsys_resume_early_cb(dev, state, &info); @@ -944,7 +974,9 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) goto Complete; } - dpm_wait_for_superior(dev, async); + if (!dpm_wait_for_superior(dev, async)) + goto Complete; + dpm_watchdog_set(&wd, dev); device_lock(dev); diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 4e7ef35f1c8f..9c3b063e1a1f 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2850,7 +2850,7 @@ static int btusb_mtk_setup_firmware(struct hci_dev *hdev, const char *fwname) err = btusb_mtk_hci_wmt_sync(hdev, &wmt_params); if (err < 0) { bt_dev_err(hdev, "Failed to send wmt rst (%d)", err); - return err; + goto err_release_fw; } /* Wait a few moments for firmware activation done */ @@ -3819,6 +3819,10 @@ static int btusb_probe(struct usb_interface *intf, * (DEVICE_REMOTE_WAKEUP) */ set_bit(BTUSB_WAKEUP_DISABLE, &data->flags); + + err = usb_autopm_get_interface(intf); + if (err < 0) + goto out_free_dev; } if (id->driver_info & BTUSB_AMP) { diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c index 1ed85f120a1b..49b9f2f85bad 100644 --- a/drivers/clk/tegra/clk-tegra-periph.c +++ b/drivers/clk/tegra/clk-tegra-periph.c @@ -785,7 +785,11 @@ static struct tegra_periph_init_data gate_clks[] = { GATE("ahbdma", "hclk", 33, 0, tegra_clk_ahbdma, 0), GATE("apbdma", "pclk", 34, 0, tegra_clk_apbdma, 0), GATE("kbc", "clk_32k", 36, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_kbc, 0), - GATE("fuse", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse, 0), + /* + * Critical for RAM re-repair operation, which must occur on resume + * from LP1 system suspend and as part of CCPLEX cluster switching. + */ + GATE("fuse", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse, CLK_IS_CRITICAL), GATE("fuse_burn", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse_burn, 0), GATE("kfuse", "clk_m", 40, TEGRA_PERIPH_ON_APB, tegra_clk_kfuse, 0), GATE("apbif", "clk_m", 107, TEGRA_PERIPH_ON_APB, tegra_clk_apbif, 0), diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 8d8da763adc5..8910fd1ae3c6 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -217,7 +217,7 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, return ret; } -static int cppc_verify_policy(struct cpufreq_policy *policy) +static int cppc_verify_policy(struct cpufreq_policy_data *policy) { cpufreq_verify_within_cpu_limits(policy); return 0; diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c index cd53272e2fa2..f7a7bcf6f52e 100644 --- a/drivers/cpufreq/cpufreq-nforce2.c +++ b/drivers/cpufreq/cpufreq-nforce2.c @@ -291,7 +291,7 @@ static int nforce2_target(struct cpufreq_policy *policy, * nforce2_verify - verifies a new CPUFreq policy * @policy: new policy */ -static int nforce2_verify(struct cpufreq_policy *policy) +static int nforce2_verify(struct cpufreq_policy_data *policy) { unsigned int fsb_pol_max; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a7db4f22a077..7679f8a91745 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -74,6 +74,9 @@ static void cpufreq_exit_governor(struct cpufreq_policy *policy); static int cpufreq_start_governor(struct cpufreq_policy *policy); static void cpufreq_stop_governor(struct cpufreq_policy *policy); static void cpufreq_governor_limits(struct cpufreq_policy *policy); +static int cpufreq_set_policy(struct cpufreq_policy *policy, + struct cpufreq_governor *new_gov, + unsigned int new_pol); /** * Two notifier lists: the "policy" list is involved in the @@ -613,25 +616,22 @@ static struct cpufreq_governor *find_governor(const char *str_governor) return NULL; } -static int cpufreq_parse_policy(char *str_governor, - struct cpufreq_policy *policy) +static unsigned int cpufreq_parse_policy(char *str_governor) { - if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) { - policy->policy = CPUFREQ_POLICY_PERFORMANCE; - return 0; - } - if (!strncasecmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) { - policy->policy = CPUFREQ_POLICY_POWERSAVE; - return 0; - } - return -EINVAL; + if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) + return CPUFREQ_POLICY_PERFORMANCE; + + if (!strncasecmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) + return CPUFREQ_POLICY_POWERSAVE; + + return CPUFREQ_POLICY_UNKNOWN; } /** * cpufreq_parse_governor - parse a governor string only for has_target() + * @str_governor: Governor name. */ -static int cpufreq_parse_governor(char *str_governor, - struct cpufreq_policy *policy) +static struct cpufreq_governor *cpufreq_parse_governor(char *str_governor) { struct cpufreq_governor *t; @@ -645,7 +645,7 @@ static int cpufreq_parse_governor(char *str_governor, ret = request_module("cpufreq_%s", str_governor); if (ret) - return -EINVAL; + return NULL; mutex_lock(&cpufreq_governor_mutex); @@ -656,12 +656,7 @@ static int cpufreq_parse_governor(char *str_governor, mutex_unlock(&cpufreq_governor_mutex); - if (t) { - policy->governor = t; - return 0; - } - - return -EINVAL; + return t; } /** @@ -762,28 +757,33 @@ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) static ssize_t store_scaling_governor(struct cpufreq_policy *policy, const char *buf, size_t count) { + char str_governor[16]; int ret; - char str_governor[16]; - struct cpufreq_policy new_policy; - - memcpy(&new_policy, policy, sizeof(*policy)); ret = sscanf(buf, "%15s", str_governor); if (ret != 1) return -EINVAL; if (cpufreq_driver->setpolicy) { - if (cpufreq_parse_policy(str_governor, &new_policy)) + unsigned int new_pol; + + new_pol = cpufreq_parse_policy(str_governor); + if (!new_pol) return -EINVAL; + + ret = cpufreq_set_policy(policy, NULL, new_pol); } else { - if (cpufreq_parse_governor(str_governor, &new_policy)) + struct cpufreq_governor *new_gov; + + new_gov = cpufreq_parse_governor(str_governor); + if (!new_gov) return -EINVAL; - } - ret = cpufreq_set_policy(policy, &new_policy); + ret = cpufreq_set_policy(policy, new_gov, + CPUFREQ_POLICY_UNKNOWN); - if (new_policy.governor) - module_put(new_policy.governor->owner); + module_put(new_gov->owner); + } return ret ? ret : count; } @@ -1050,40 +1050,33 @@ __weak struct cpufreq_governor *cpufreq_default_governor(void) static int cpufreq_init_policy(struct cpufreq_policy *policy) { - struct cpufreq_governor *gov = NULL, *def_gov = NULL; - struct cpufreq_policy new_policy; - - memcpy(&new_policy, policy, sizeof(*policy)); - - def_gov = cpufreq_default_governor(); + struct cpufreq_governor *def_gov = cpufreq_default_governor(); + struct cpufreq_governor *gov = NULL; + unsigned int pol = CPUFREQ_POLICY_UNKNOWN; if (has_target()) { - /* - * Update governor of new_policy to the governor used before - * hotplug - */ + /* Update policy governor to the one used before hotplug. */ gov = find_governor(policy->last_governor); if (gov) { pr_debug("Restoring governor %s for cpu %d\n", - policy->governor->name, policy->cpu); - } else { - if (!def_gov) - return -ENODATA; + policy->governor->name, policy->cpu); + } else if (def_gov) { gov = def_gov; + } else { + return -ENODATA; } - new_policy.governor = gov; } else { /* Use the default policy if there is no last_policy. */ if (policy->last_policy) { - new_policy.policy = policy->last_policy; + pol = policy->last_policy; + } else if (def_gov) { + pol = cpufreq_parse_policy(def_gov->name); } else { - if (!def_gov) - return -ENODATA; - cpufreq_parse_policy(def_gov->name, &new_policy); + return -ENODATA; } } - return cpufreq_set_policy(policy, &new_policy); + return cpufreq_set_policy(policy, gov, pol); } static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) @@ -1111,13 +1104,10 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp void refresh_frequency_limits(struct cpufreq_policy *policy) { - struct cpufreq_policy new_policy; - if (!policy_is_inactive(policy)) { - new_policy = *policy; pr_debug("updating policy for CPU %u\n", policy->cpu); - cpufreq_set_policy(policy, &new_policy); + cpufreq_set_policy(policy, policy->governor, policy->policy); } } EXPORT_SYMBOL(refresh_frequency_limits); @@ -2361,43 +2351,46 @@ EXPORT_SYMBOL(cpufreq_get_policy); /** * cpufreq_set_policy - Modify cpufreq policy parameters. * @policy: Policy object to modify. - * @new_policy: New policy data. + * @new_gov: Policy governor pointer. + * @new_pol: Policy value (for drivers with built-in governors). * - * Pass @new_policy to the cpufreq driver's ->verify() callback. Next, copy the - * min and max parameters of @new_policy to @policy and either invoke the - * driver's ->setpolicy() callback (if present) or carry out a governor update - * for @policy. That is, run the current governor's ->limits() callback (if the - * governor field in @new_policy points to the same object as the one in - * @policy) or replace the governor for @policy with the new one stored in - * @new_policy. + * Invoke the cpufreq driver's ->verify() callback to sanity-check the frequency + * limits to be set for the policy, update @policy with the verified limits + * values and either invoke the driver's ->setpolicy() callback (if present) or + * carry out a governor update for @policy. That is, run the current governor's + * ->limits() callback (if @new_gov points to the same object as the one in + * @policy) or replace the governor for @policy with @new_gov. * * The cpuinfo part of @policy is not updated by this function. */ -int cpufreq_set_policy(struct cpufreq_policy *policy, - struct cpufreq_policy *new_policy) +static int cpufreq_set_policy(struct cpufreq_policy *policy, + struct cpufreq_governor *new_gov, + unsigned int new_pol) { + struct cpufreq_policy_data new_data; struct cpufreq_governor *old_gov; int ret; - pr_debug("setting new policy for CPU %u: %u - %u kHz\n", - new_policy->cpu, new_policy->min, new_policy->max); - - memcpy(&new_policy->cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo)); - + memcpy(&new_data.cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo)); + new_data.freq_table = policy->freq_table; + new_data.cpu = policy->cpu; /* * PM QoS framework collects all the requests from users and provide us * the final aggregated value here. */ - new_policy->min = freq_qos_read_value(&policy->constraints, FREQ_QOS_MIN); - new_policy->max = freq_qos_read_value(&policy->constraints, FREQ_QOS_MAX); + new_data.min = freq_qos_read_value(&policy->constraints, FREQ_QOS_MIN); + new_data.max = freq_qos_read_value(&policy->constraints, FREQ_QOS_MAX); + + pr_debug("setting new policy for CPU %u: %u - %u kHz\n", + new_data.cpu, new_data.min, new_data.max); /* verify the cpu speed can be set within this limit */ - ret = cpufreq_driver->verify(new_policy); + ret = cpufreq_driver->verify(&new_data); if (ret) return ret; - policy->min = new_policy->min; - policy->max = new_policy->max; + policy->min = new_data.min; + policy->max = new_data.max; trace_cpu_frequency_limits(policy); policy->cached_target_freq = UINT_MAX; @@ -2406,12 +2399,12 @@ int cpufreq_set_policy(struct cpufreq_policy *policy, policy->min, policy->max); if (cpufreq_driver->setpolicy) { - policy->policy = new_policy->policy; + policy->policy = new_pol; pr_debug("setting range\n"); return cpufreq_driver->setpolicy(policy); } - if (new_policy->governor == policy->governor) { + if (new_gov == policy->governor) { pr_debug("governor limits update\n"); cpufreq_governor_limits(policy); return 0; @@ -2428,7 +2421,7 @@ int cpufreq_set_policy(struct cpufreq_policy *policy, } /* start new governor */ - policy->governor = new_policy->governor; + policy->governor = new_gov; ret = cpufreq_init_governor(policy); if (!ret) { ret = cpufreq_start_governor(policy); diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index ded427e0a488..e117b0059123 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -60,7 +60,7 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, return 0; } -int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, +int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy, struct cpufreq_frequency_table *table) { struct cpufreq_frequency_table *pos; @@ -100,7 +100,7 @@ EXPORT_SYMBOL_GPL(cpufreq_frequency_table_verify); * Generic routine to verify policy & frequency table, requires driver to set * policy->freq_table prior to it. */ -int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy) +int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy) { if (!policy->freq_table) return -ENODEV; diff --git a/drivers/cpufreq/gx-suspmod.c b/drivers/cpufreq/gx-suspmod.c index e97b5733aa24..75b3ef7ec679 100644 --- a/drivers/cpufreq/gx-suspmod.c +++ b/drivers/cpufreq/gx-suspmod.c @@ -328,7 +328,7 @@ static void gx_set_cpuspeed(struct cpufreq_policy *policy, unsigned int khz) * for the hardware supported by the driver. */ -static int cpufreq_gx_verify(struct cpufreq_policy *policy) +static int cpufreq_gx_verify(struct cpufreq_policy_data *policy) { unsigned int tmp_freq = 0; u8 tmp1, tmp2; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 8ab31702cf6a..45499e0b9f2f 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2036,8 +2036,9 @@ static int intel_pstate_get_max_freq(struct cpudata *cpu) cpu->pstate.max_freq : cpu->pstate.turbo_freq; } -static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, - struct cpudata *cpu) +static void intel_pstate_update_perf_limits(struct cpudata *cpu, + unsigned int policy_min, + unsigned int policy_max) { int max_freq = intel_pstate_get_max_freq(cpu); int32_t max_policy_perf, min_policy_perf; @@ -2056,18 +2057,17 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, turbo_max = cpu->pstate.turbo_pstate; } - max_policy_perf = max_state * policy->max / max_freq; - if (policy->max == policy->min) { + max_policy_perf = max_state * policy_max / max_freq; + if (policy_max == policy_min) { min_policy_perf = max_policy_perf; } else { - min_policy_perf = max_state * policy->min / max_freq; + min_policy_perf = max_state * policy_min / max_freq; min_policy_perf = clamp_t(int32_t, min_policy_perf, 0, max_policy_perf); } pr_debug("cpu:%d max_state %d min_policy_perf:%d max_policy_perf:%d\n", - policy->cpu, max_state, - min_policy_perf, max_policy_perf); + cpu->cpu, max_state, min_policy_perf, max_policy_perf); /* Normalize user input to [min_perf, max_perf] */ if (per_cpu_limits) { @@ -2081,7 +2081,7 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, global_min = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100); global_min = clamp_t(int32_t, global_min, 0, global_max); - pr_debug("cpu:%d global_min:%d global_max:%d\n", policy->cpu, + pr_debug("cpu:%d global_min:%d global_max:%d\n", cpu->cpu, global_min, global_max); cpu->min_perf_ratio = max(min_policy_perf, global_min); @@ -2094,7 +2094,7 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, cpu->max_perf_ratio); } - pr_debug("cpu:%d max_perf_ratio:%d min_perf_ratio:%d\n", policy->cpu, + pr_debug("cpu:%d max_perf_ratio:%d min_perf_ratio:%d\n", cpu->cpu, cpu->max_perf_ratio, cpu->min_perf_ratio); } @@ -2114,7 +2114,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) mutex_lock(&intel_pstate_limits_lock); - intel_pstate_update_perf_limits(policy, cpu); + intel_pstate_update_perf_limits(cpu, policy->min, policy->max); if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { /* @@ -2143,8 +2143,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) return 0; } -static void intel_pstate_adjust_policy_max(struct cpufreq_policy *policy, - struct cpudata *cpu) +static void intel_pstate_adjust_policy_max(struct cpudata *cpu, + struct cpufreq_policy_data *policy) { if (!hwp_active && cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && @@ -2155,7 +2155,7 @@ static void intel_pstate_adjust_policy_max(struct cpufreq_policy *policy, } } -static int intel_pstate_verify_policy(struct cpufreq_policy *policy) +static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy) { struct cpudata *cpu = all_cpu_data[policy->cpu]; @@ -2163,11 +2163,7 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy) cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, intel_pstate_get_max_freq(cpu)); - if (policy->policy != CPUFREQ_POLICY_POWERSAVE && - policy->policy != CPUFREQ_POLICY_PERFORMANCE) - return -EINVAL; - - intel_pstate_adjust_policy_max(policy, cpu); + intel_pstate_adjust_policy_max(cpu, policy); return 0; } @@ -2268,7 +2264,7 @@ static struct cpufreq_driver intel_pstate = { .name = "intel_pstate", }; -static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) +static int intel_cpufreq_verify_policy(struct cpufreq_policy_data *policy) { struct cpudata *cpu = all_cpu_data[policy->cpu]; @@ -2276,9 +2272,9 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, intel_pstate_get_max_freq(cpu)); - intel_pstate_adjust_policy_max(policy, cpu); + intel_pstate_adjust_policy_max(cpu, policy); - intel_pstate_update_perf_limits(policy, cpu); + intel_pstate_update_perf_limits(cpu, policy->min, policy->max); return 0; } diff --git a/drivers/cpufreq/longrun.c b/drivers/cpufreq/longrun.c index 64b8689f7a4a..0b08be8bff76 100644 --- a/drivers/cpufreq/longrun.c +++ b/drivers/cpufreq/longrun.c @@ -122,7 +122,7 @@ static int longrun_set_policy(struct cpufreq_policy *policy) * Validates a new CPUFreq policy. This function has to be called with * cpufreq_driver locked. */ -static int longrun_verify_policy(struct cpufreq_policy *policy) +static int longrun_verify_policy(struct cpufreq_policy_data *policy) { if (!policy) return -EINVAL; @@ -130,10 +130,6 @@ static int longrun_verify_policy(struct cpufreq_policy *policy) policy->cpu = 0; cpufreq_verify_within_cpu_limits(policy); - if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) && - (policy->policy != CPUFREQ_POLICY_PERFORMANCE)) - return -EINVAL; - return 0; } diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c index fdc767fdbe6a..f90273006553 100644 --- a/drivers/cpufreq/pcc-cpufreq.c +++ b/drivers/cpufreq/pcc-cpufreq.c @@ -109,7 +109,7 @@ struct pcc_cpu { static struct pcc_cpu __percpu *pcc_cpu_info; -static int pcc_cpufreq_verify(struct cpufreq_policy *policy) +static int pcc_cpufreq_verify(struct cpufreq_policy_data *policy) { cpufreq_verify_within_cpu_limits(policy); return 0; diff --git a/drivers/cpufreq/sh-cpufreq.c b/drivers/cpufreq/sh-cpufreq.c index 5096c0ab781b..0ac265d47ef0 100644 --- a/drivers/cpufreq/sh-cpufreq.c +++ b/drivers/cpufreq/sh-cpufreq.c @@ -87,7 +87,7 @@ static int sh_cpufreq_target(struct cpufreq_policy *policy, return work_on_cpu(policy->cpu, __sh_cpufreq_target, &data); } -static int sh_cpufreq_verify(struct cpufreq_policy *policy) +static int sh_cpufreq_verify(struct cpufreq_policy_data *policy) { struct clk *cpuclk = &per_cpu(sh_cpuclk, policy->cpu); struct cpufreq_frequency_table *freq_table; diff --git a/drivers/cpufreq/unicore2-cpufreq.c b/drivers/cpufreq/unicore2-cpufreq.c index 707dbc1b7ac8..98d392196df2 100644 --- a/drivers/cpufreq/unicore2-cpufreq.c +++ b/drivers/cpufreq/unicore2-cpufreq.c @@ -22,7 +22,7 @@ static struct cpufreq_driver ucv2_driver; /* make sure that only the "userspace" governor is run * -- anything else wouldn't make sense on this platform, anyway. */ -static int ucv2_verify_speed(struct cpufreq_policy *policy) +static int ucv2_verify_speed(struct cpufreq_policy_data *policy) { if (policy->cpu) return -EINVAL; diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index db99cee1991c..89f79d763ab8 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -88,7 +88,6 @@ struct atmel_aes_caps { bool has_dualbuff; bool has_cfb64; - bool has_ctr32; bool has_gcm; bool has_xts; bool has_authenc; @@ -1013,8 +1012,9 @@ static int atmel_aes_ctr_transfer(struct atmel_aes_dev *dd) struct atmel_aes_ctr_ctx *ctx = atmel_aes_ctr_ctx_cast(dd->ctx); struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq); struct scatterlist *src, *dst; - u32 ctr, blocks; size_t datalen; + u32 ctr; + u16 blocks, start, end; bool use_dma, fragmented = false; /* Check for transfer completion. */ @@ -1026,27 +1026,17 @@ static int atmel_aes_ctr_transfer(struct atmel_aes_dev *dd) datalen = req->nbytes - ctx->offset; blocks = DIV_ROUND_UP(datalen, AES_BLOCK_SIZE); ctr = be32_to_cpu(ctx->iv[3]); - if (dd->caps.has_ctr32) { - /* Check 32bit counter overflow. */ - u32 start = ctr; - u32 end = start + blocks - 1; - - if (end < start) { - ctr |= 0xffffffff; - datalen = AES_BLOCK_SIZE * -start; - fragmented = true; - } - } else { - /* Check 16bit counter overflow. */ - u16 start = ctr & 0xffff; - u16 end = start + (u16)blocks - 1; - - if (blocks >> 16 || end < start) { - ctr |= 0xffff; - datalen = AES_BLOCK_SIZE * (0x10000-start); - fragmented = true; - } + + /* Check 16bit counter overflow. */ + start = ctr & 0xffff; + end = start + blocks - 1; + + if (blocks >> 16 || end < start) { + ctr |= 0xffff; + datalen = AES_BLOCK_SIZE * (0x10000 - start); + fragmented = true; } + use_dma = (datalen >= ATMEL_AES_DMA_THRESHOLD); /* Jump to offset. */ @@ -2550,7 +2540,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) { dd->caps.has_dualbuff = 0; dd->caps.has_cfb64 = 0; - dd->caps.has_ctr32 = 0; dd->caps.has_gcm = 0; dd->caps.has_xts = 0; dd->caps.has_authenc = 0; @@ -2561,7 +2550,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) case 0x500: dd->caps.has_dualbuff = 1; dd->caps.has_cfb64 = 1; - dd->caps.has_ctr32 = 1; dd->caps.has_gcm = 1; dd->caps.has_xts = 1; dd->caps.has_authenc = 1; @@ -2570,7 +2558,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) case 0x200: dd->caps.has_dualbuff = 1; dd->caps.has_cfb64 = 1; - dd->caps.has_ctr32 = 1; dd->caps.has_gcm = 1; dd->caps.max_burst_size = 4; break; diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c index 0186b3df4c87..0d5576f6ad21 100644 --- a/drivers/crypto/ccp/ccp-dev-v3.c +++ b/drivers/crypto/ccp/ccp-dev-v3.c @@ -586,6 +586,7 @@ const struct ccp_vdata ccpv3_platform = { .setup = NULL, .perform = &ccp3_actions, .offset = 0, + .rsamax = CCP_RSA_MAX_WIDTH, }; const struct ccp_vdata ccpv3 = { diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index d3e8faa03f15..3d7c8d9e54b9 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -237,7 +237,7 @@ static void cc_aead_complete(struct device *dev, void *cc_req, int err) * revealed the decrypted message --> zero its memory. */ sg_zero_buffer(areq->dst, sg_nents(areq->dst), - areq->cryptlen, 0); + areq->cryptlen, areq->assoclen); err = -EBADMSG; } /*ENCRYPT*/ diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index 254b48797799..cd9c60268bf8 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -523,6 +523,7 @@ static void cc_setup_readiv_desc(struct crypto_tfm *tfm, } } + static void cc_setup_state_desc(struct crypto_tfm *tfm, struct cipher_req_ctx *req_ctx, unsigned int ivsize, unsigned int nbytes, @@ -534,8 +535,6 @@ static void cc_setup_state_desc(struct crypto_tfm *tfm, int cipher_mode = ctx_p->cipher_mode; int flow_mode = ctx_p->flow_mode; int direction = req_ctx->gen_ctx.op_type; - dma_addr_t key_dma_addr = ctx_p->user.key_dma_addr; - unsigned int key_len = ctx_p->keylen; dma_addr_t iv_dma_addr = req_ctx->gen_ctx.iv_dma_addr; unsigned int du_size = nbytes; @@ -570,6 +569,47 @@ static void cc_setup_state_desc(struct crypto_tfm *tfm, break; case DRV_CIPHER_XTS: case DRV_CIPHER_ESSIV: + case DRV_CIPHER_BITLOCKER: + break; + default: + dev_err(dev, "Unsupported cipher mode (%d)\n", cipher_mode); + } +} + + +static void cc_setup_xex_state_desc(struct crypto_tfm *tfm, + struct cipher_req_ctx *req_ctx, + unsigned int ivsize, unsigned int nbytes, + struct cc_hw_desc desc[], + unsigned int *seq_size) +{ + struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm); + struct device *dev = drvdata_to_dev(ctx_p->drvdata); + int cipher_mode = ctx_p->cipher_mode; + int flow_mode = ctx_p->flow_mode; + int direction = req_ctx->gen_ctx.op_type; + dma_addr_t key_dma_addr = ctx_p->user.key_dma_addr; + unsigned int key_len = ctx_p->keylen; + dma_addr_t iv_dma_addr = req_ctx->gen_ctx.iv_dma_addr; + unsigned int du_size = nbytes; + + struct cc_crypto_alg *cc_alg = + container_of(tfm->__crt_alg, struct cc_crypto_alg, + skcipher_alg.base); + + if (cc_alg->data_unit) + du_size = cc_alg->data_unit; + + switch (cipher_mode) { + case DRV_CIPHER_ECB: + break; + case DRV_CIPHER_CBC: + case DRV_CIPHER_CBC_CTS: + case DRV_CIPHER_CTR: + case DRV_CIPHER_OFB: + break; + case DRV_CIPHER_XTS: + case DRV_CIPHER_ESSIV: case DRV_CIPHER_BITLOCKER: /* load XEX key */ hw_desc_init(&desc[*seq_size]); @@ -881,12 +921,14 @@ static int cc_cipher_process(struct skcipher_request *req, /* STAT_PHASE_2: Create sequence */ - /* Setup IV and XEX key used */ + /* Setup state (IV) */ cc_setup_state_desc(tfm, req_ctx, ivsize, nbytes, desc, &seq_len); /* Setup MLLI line, if needed */ cc_setup_mlli_desc(tfm, req_ctx, dst, src, nbytes, req, desc, &seq_len); /* Setup key */ cc_setup_key_desc(tfm, req_ctx, nbytes, desc, &seq_len); + /* Setup state (IV and XEX key) */ + cc_setup_xex_state_desc(tfm, req_ctx, ivsize, nbytes, desc, &seq_len); /* Data processing */ cc_setup_flow_desc(tfm, req_ctx, dst, src, nbytes, desc, &seq_len); /* Read next IV */ diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h index ab31d4a68c80..7d2f7e2c0bb5 100644 --- a/drivers/crypto/ccree/cc_driver.h +++ b/drivers/crypto/ccree/cc_driver.h @@ -161,6 +161,7 @@ struct cc_drvdata { int std_bodies; bool sec_disabled; u32 comp_mask; + bool pm_on; }; struct cc_crypto_alg { diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c index dbc508fb719b..452bd77a9ba0 100644 --- a/drivers/crypto/ccree/cc_pm.c +++ b/drivers/crypto/ccree/cc_pm.c @@ -22,14 +22,8 @@ const struct dev_pm_ops ccree_pm = { int cc_pm_suspend(struct device *dev) { struct cc_drvdata *drvdata = dev_get_drvdata(dev); - int rc; dev_dbg(dev, "set HOST_POWER_DOWN_EN\n"); - rc = cc_suspend_req_queue(drvdata); - if (rc) { - dev_err(dev, "cc_suspend_req_queue (%x)\n", rc); - return rc; - } fini_cc_regs(drvdata); cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_ENABLE); cc_clk_off(drvdata); @@ -63,13 +57,6 @@ int cc_pm_resume(struct device *dev) /* check if tee fips error occurred during power down */ cc_tee_handle_fips_error(drvdata); - rc = cc_resume_req_queue(drvdata); - if (rc) { - dev_err(dev, "cc_resume_req_queue (%x)\n", rc); - return rc; - } - - /* must be after the queue resuming as it uses the HW queue*/ cc_init_hash_sram(drvdata); return 0; @@ -80,12 +67,10 @@ int cc_pm_get(struct device *dev) int rc = 0; struct cc_drvdata *drvdata = dev_get_drvdata(dev); - if (cc_req_queue_suspended(drvdata)) + if (drvdata->pm_on) rc = pm_runtime_get_sync(dev); - else - pm_runtime_get_noresume(dev); - return rc; + return (rc == 1 ? 0 : rc); } int cc_pm_put_suspend(struct device *dev) @@ -93,14 +78,11 @@ int cc_pm_put_suspend(struct device *dev) int rc = 0; struct cc_drvdata *drvdata = dev_get_drvdata(dev); - if (!cc_req_queue_suspended(drvdata)) { + if (drvdata->pm_on) { pm_runtime_mark_last_busy(dev); rc = pm_runtime_put_autosuspend(dev); - } else { - /* Something wrong happens*/ - dev_err(dev, "request to suspend already suspended queue"); - rc = -EBUSY; } + return rc; } @@ -117,7 +99,7 @@ int cc_pm_init(struct cc_drvdata *drvdata) /* must be before the enabling to avoid resdundent suspending */ pm_runtime_set_autosuspend_delay(dev, CC_SUSPEND_TIMEOUT); pm_runtime_use_autosuspend(dev); - /* activate the PM module */ + /* set us as active - note we won't do PM ops until cc_pm_go()! */ return pm_runtime_set_active(dev); } @@ -125,9 +107,11 @@ int cc_pm_init(struct cc_drvdata *drvdata) void cc_pm_go(struct cc_drvdata *drvdata) { pm_runtime_enable(drvdata_to_dev(drvdata)); + drvdata->pm_on = true; } void cc_pm_fini(struct cc_drvdata *drvdata) { pm_runtime_disable(drvdata_to_dev(drvdata)); + drvdata->pm_on = false; } diff --git a/drivers/crypto/ccree/cc_request_mgr.c b/drivers/crypto/ccree/cc_request_mgr.c index a947d5a2cf35..37e6fee37b13 100644 --- a/drivers/crypto/ccree/cc_request_mgr.c +++ b/drivers/crypto/ccree/cc_request_mgr.c @@ -41,7 +41,6 @@ struct cc_req_mgr_handle { #else struct tasklet_struct comptask; #endif - bool is_runtime_suspended; }; struct cc_bl_item { @@ -404,6 +403,7 @@ static void cc_proc_backlog(struct cc_drvdata *drvdata) spin_lock(&mgr->bl_lock); list_del(&bli->list); --mgr->bl_len; + kfree(bli); } spin_unlock(&mgr->bl_lock); @@ -677,52 +677,3 @@ static void comp_handler(unsigned long devarg) cc_proc_backlog(drvdata); dev_dbg(dev, "Comp. handler done.\n"); } - -/* - * resume the queue configuration - no need to take the lock as this happens - * inside the spin lock protection - */ -#if defined(CONFIG_PM) -int cc_resume_req_queue(struct cc_drvdata *drvdata) -{ - struct cc_req_mgr_handle *request_mgr_handle = - drvdata->request_mgr_handle; - - spin_lock_bh(&request_mgr_handle->hw_lock); - request_mgr_handle->is_runtime_suspended = false; - spin_unlock_bh(&request_mgr_handle->hw_lock); - - return 0; -} - -/* - * suspend the queue configuration. Since it is used for the runtime suspend - * only verify that the queue can be suspended. - */ -int cc_suspend_req_queue(struct cc_drvdata *drvdata) -{ - struct cc_req_mgr_handle *request_mgr_handle = - drvdata->request_mgr_handle; - - /* lock the send_request */ - spin_lock_bh(&request_mgr_handle->hw_lock); - if (request_mgr_handle->req_queue_head != - request_mgr_handle->req_queue_tail) { - spin_unlock_bh(&request_mgr_handle->hw_lock); - return -EBUSY; - } - request_mgr_handle->is_runtime_suspended = true; - spin_unlock_bh(&request_mgr_handle->hw_lock); - - return 0; -} - -bool cc_req_queue_suspended(struct cc_drvdata *drvdata) -{ - struct cc_req_mgr_handle *request_mgr_handle = - drvdata->request_mgr_handle; - - return request_mgr_handle->is_runtime_suspended; -} - -#endif diff --git a/drivers/crypto/ccree/cc_request_mgr.h b/drivers/crypto/ccree/cc_request_mgr.h index f46cf766fe4d..ff7746aaaf35 100644 --- a/drivers/crypto/ccree/cc_request_mgr.h +++ b/drivers/crypto/ccree/cc_request_mgr.h @@ -40,12 +40,4 @@ void complete_request(struct cc_drvdata *drvdata); void cc_req_mgr_fini(struct cc_drvdata *drvdata); -#if defined(CONFIG_PM) -int cc_resume_req_queue(struct cc_drvdata *drvdata); - -int cc_suspend_req_queue(struct cc_drvdata *drvdata); - -bool cc_req_queue_suspended(struct cc_drvdata *drvdata); -#endif - #endif /*__REQUEST_MGR_H__*/ diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig index 504daff7687d..f7f0a1fb6895 100644 --- a/drivers/crypto/hisilicon/Kconfig +++ b/drivers/crypto/hisilicon/Kconfig @@ -35,6 +35,5 @@ config CRYPTO_DEV_HISI_ZIP depends on ARM64 && PCI && PCI_MSI select CRYPTO_DEV_HISI_QM select CRYPTO_HISI_SGL - select SG_SPLIT help Support for HiSilicon ZIP Driver diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h index ffb00d987d02..99f21d848d4f 100644 --- a/drivers/crypto/hisilicon/zip/zip.h +++ b/drivers/crypto/hisilicon/zip/zip.h @@ -12,6 +12,10 @@ /* hisi_zip_sqe dw3 */ #define HZIP_BD_STATUS_M GENMASK(7, 0) +/* hisi_zip_sqe dw7 */ +#define HZIP_IN_SGE_DATA_OFFSET_M GENMASK(23, 0) +/* hisi_zip_sqe dw8 */ +#define HZIP_OUT_SGE_DATA_OFFSET_M GENMASK(23, 0) /* hisi_zip_sqe dw9 */ #define HZIP_REQ_TYPE_M GENMASK(7, 0) #define HZIP_ALG_TYPE_ZLIB 0x02 diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c index 59023545a1c4..cf34bfdfb3e6 100644 --- a/drivers/crypto/hisilicon/zip/zip_crypto.c +++ b/drivers/crypto/hisilicon/zip/zip_crypto.c @@ -45,10 +45,8 @@ enum hisi_zip_alg_type { struct hisi_zip_req { struct acomp_req *req; - struct scatterlist *src; - struct scatterlist *dst; - size_t slen; - size_t dlen; + int sskip; + int dskip; struct hisi_acc_hw_sgl *hw_src; struct hisi_acc_hw_sgl *hw_dst; dma_addr_t dma_src; @@ -94,13 +92,15 @@ static void hisi_zip_config_tag(struct hisi_zip_sqe *sqe, u32 tag) static void hisi_zip_fill_sqe(struct hisi_zip_sqe *sqe, u8 req_type, dma_addr_t s_addr, dma_addr_t d_addr, u32 slen, - u32 dlen) + u32 dlen, int sskip, int dskip) { memset(sqe, 0, sizeof(struct hisi_zip_sqe)); - sqe->input_data_length = slen; + sqe->input_data_length = slen - sskip; + sqe->dw7 = FIELD_PREP(HZIP_IN_SGE_DATA_OFFSET_M, sskip); + sqe->dw8 = FIELD_PREP(HZIP_OUT_SGE_DATA_OFFSET_M, dskip); sqe->dw9 = FIELD_PREP(HZIP_REQ_TYPE_M, req_type); - sqe->dest_avail_out = dlen; + sqe->dest_avail_out = dlen - dskip; sqe->source_addr_l = lower_32_bits(s_addr); sqe->source_addr_h = upper_32_bits(s_addr); sqe->dest_addr_l = lower_32_bits(d_addr); @@ -301,11 +301,6 @@ static void hisi_zip_remove_req(struct hisi_zip_qp_ctx *qp_ctx, { struct hisi_zip_req_q *req_q = &qp_ctx->req_q; - if (qp_ctx->qp->alg_type == HZIP_ALG_TYPE_COMP) - kfree(req->dst); - else - kfree(req->src); - write_lock(&req_q->req_lock); clear_bit(req->req_id, req_q->req_bitmap); memset(req, 0, sizeof(struct hisi_zip_req)); @@ -333,8 +328,8 @@ static void hisi_zip_acomp_cb(struct hisi_qp *qp, void *data) } dlen = sqe->produced; - hisi_acc_sg_buf_unmap(dev, req->src, req->hw_src); - hisi_acc_sg_buf_unmap(dev, req->dst, req->hw_dst); + hisi_acc_sg_buf_unmap(dev, acomp_req->src, req->hw_src); + hisi_acc_sg_buf_unmap(dev, acomp_req->dst, req->hw_dst); head_size = (qp->alg_type == 0) ? TO_HEAD_SIZE(qp->req_type) : 0; acomp_req->dlen = dlen + head_size; @@ -428,20 +423,6 @@ static size_t get_comp_head_size(struct scatterlist *src, u8 req_type) } } -static int get_sg_skip_bytes(struct scatterlist *sgl, size_t bytes, - size_t remains, struct scatterlist **out) -{ -#define SPLIT_NUM 2 - size_t split_sizes[SPLIT_NUM]; - int out_mapped_nents[SPLIT_NUM]; - - split_sizes[0] = bytes; - split_sizes[1] = remains; - - return sg_split(sgl, 0, 0, SPLIT_NUM, split_sizes, out, - out_mapped_nents, GFP_KERNEL); -} - static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req, struct hisi_zip_qp_ctx *qp_ctx, size_t head_size, bool is_comp) @@ -449,31 +430,7 @@ static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req, struct hisi_zip_req_q *req_q = &qp_ctx->req_q; struct hisi_zip_req *q = req_q->q; struct hisi_zip_req *req_cache; - struct scatterlist *out[2]; - struct scatterlist *sgl; - size_t len; - int ret, req_id; - - /* - * remove/add zlib/gzip head, as hardware operations do not include - * comp head. so split req->src to get sgl without heads in acomp, or - * add comp head to req->dst ahead of that hardware output compressed - * data in sgl splited from req->dst without comp head. - */ - if (is_comp) { - sgl = req->dst; - len = req->dlen - head_size; - } else { - sgl = req->src; - len = req->slen - head_size; - } - - ret = get_sg_skip_bytes(sgl, head_size, len, out); - if (ret) - return ERR_PTR(ret); - - /* sgl for comp head is useless, so free it now */ - kfree(out[0]); + int req_id; write_lock(&req_q->req_lock); @@ -481,7 +438,6 @@ static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req, if (req_id >= req_q->size) { write_unlock(&req_q->req_lock); dev_dbg(&qp_ctx->qp->qm->pdev->dev, "req cache is full!\n"); - kfree(out[1]); return ERR_PTR(-EBUSY); } set_bit(req_id, req_q->req_bitmap); @@ -489,16 +445,13 @@ static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req, req_cache = q + req_id; req_cache->req_id = req_id; req_cache->req = req; + if (is_comp) { - req_cache->src = req->src; - req_cache->dst = out[1]; - req_cache->slen = req->slen; - req_cache->dlen = req->dlen - head_size; + req_cache->sskip = 0; + req_cache->dskip = head_size; } else { - req_cache->src = out[1]; - req_cache->dst = req->dst; - req_cache->slen = req->slen - head_size; - req_cache->dlen = req->dlen; + req_cache->sskip = head_size; + req_cache->dskip = 0; } write_unlock(&req_q->req_lock); @@ -510,6 +463,7 @@ static int hisi_zip_do_work(struct hisi_zip_req *req, struct hisi_zip_qp_ctx *qp_ctx) { struct hisi_zip_sqe *zip_sqe = &qp_ctx->zip_sqe; + struct acomp_req *a_req = req->req; struct hisi_qp *qp = qp_ctx->qp; struct device *dev = &qp->qm->pdev->dev; struct hisi_acc_sgl_pool *pool = &qp_ctx->sgl_pool; @@ -517,16 +471,16 @@ static int hisi_zip_do_work(struct hisi_zip_req *req, dma_addr_t output; int ret; - if (!req->src || !req->slen || !req->dst || !req->dlen) + if (!a_req->src || !a_req->slen || !a_req->dst || !a_req->dlen) return -EINVAL; - req->hw_src = hisi_acc_sg_buf_map_to_hw_sgl(dev, req->src, pool, + req->hw_src = hisi_acc_sg_buf_map_to_hw_sgl(dev, a_req->src, pool, req->req_id << 1, &input); if (IS_ERR(req->hw_src)) return PTR_ERR(req->hw_src); req->dma_src = input; - req->hw_dst = hisi_acc_sg_buf_map_to_hw_sgl(dev, req->dst, pool, + req->hw_dst = hisi_acc_sg_buf_map_to_hw_sgl(dev, a_req->dst, pool, (req->req_id << 1) + 1, &output); if (IS_ERR(req->hw_dst)) { @@ -535,8 +489,8 @@ static int hisi_zip_do_work(struct hisi_zip_req *req, } req->dma_dst = output; - hisi_zip_fill_sqe(zip_sqe, qp->req_type, input, output, req->slen, - req->dlen); + hisi_zip_fill_sqe(zip_sqe, qp->req_type, input, output, a_req->slen, + a_req->dlen, req->sskip, req->dskip); hisi_zip_config_buf_type(zip_sqe, HZIP_SGL); hisi_zip_config_tag(zip_sqe, req->req_id); @@ -548,9 +502,9 @@ static int hisi_zip_do_work(struct hisi_zip_req *req, return -EINPROGRESS; err_unmap_output: - hisi_acc_sg_buf_unmap(dev, req->dst, req->hw_dst); + hisi_acc_sg_buf_unmap(dev, a_req->dst, req->hw_dst); err_unmap_input: - hisi_acc_sg_buf_unmap(dev, req->src, req->hw_src); + hisi_acc_sg_buf_unmap(dev, a_req->src, req->hw_src); return ret; } diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index 3cbefb41b099..2680e1525db5 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -1613,6 +1613,11 @@ static const struct of_device_id spacc_of_id_table[] = { MODULE_DEVICE_TABLE(of, spacc_of_id_table); #endif /* CONFIG_OF */ +static void spacc_tasklet_kill(void *data) +{ + tasklet_kill(data); +} + static int spacc_probe(struct platform_device *pdev) { int i, err, ret; @@ -1655,6 +1660,14 @@ static int spacc_probe(struct platform_device *pdev) return -ENXIO; } + tasklet_init(&engine->complete, spacc_spacc_complete, + (unsigned long)engine); + + ret = devm_add_action(&pdev->dev, spacc_tasklet_kill, + &engine->complete); + if (ret) + return ret; + if (devm_request_irq(&pdev->dev, irq->start, spacc_spacc_irq, 0, engine->name, engine)) { dev_err(engine->dev, "failed to request IRQ\n"); @@ -1712,8 +1725,6 @@ static int spacc_probe(struct platform_device *pdev) INIT_LIST_HEAD(&engine->completed); INIT_LIST_HEAD(&engine->in_progress); engine->in_flight = 0; - tasklet_init(&engine->complete, spacc_spacc_complete, - (unsigned long)engine); platform_set_drvdata(pdev, engine); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index ee1dc75f5ddc..1d733b57e60f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -247,7 +247,8 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( drm_dp_mst_reset_vcpi_slots(mst_mgr, mst_port); } - ret = drm_dp_update_payload_part1(mst_mgr); + /* It's OK for this to fail */ + drm_dp_update_payload_part1(mst_mgr); /* mst_mgr->->payloads are VC payload notify MST branch using DPCD or * AUX message. The sequence is slot 1-63 allocated sequence for each @@ -256,9 +257,6 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( get_payload_table(aconnector, proposed_table); - if (ret) - return false; - return true; } @@ -316,7 +314,6 @@ bool dm_helpers_dp_mst_send_payload_allocation( struct amdgpu_dm_connector *aconnector; struct drm_dp_mst_topology_mgr *mst_mgr; struct drm_dp_mst_port *mst_port; - int ret; aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; @@ -330,10 +327,8 @@ bool dm_helpers_dp_mst_send_payload_allocation( if (!mst_mgr->mst_state) return false; - ret = drm_dp_update_payload_part2(mst_mgr); - - if (ret) - return false; + /* It's OK for this to fail */ + drm_dp_update_payload_part2(mst_mgr); if (!enable) drm_dp_mst_deallocate_vcpi(mst_mgr, mst_port); diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c index f2e73e6d46b8..10985134ce0b 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c @@ -73,7 +73,11 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c) unsigned long prate; unsigned int mask = ATMEL_HLCDC_CLKDIV_MASK | ATMEL_HLCDC_CLKPOL; unsigned int cfg = 0; - int div; + int div, ret; + + ret = clk_prepare_enable(crtc->dc->hlcdc->sys_clk); + if (ret) + return; vm.vfront_porch = adj->crtc_vsync_start - adj->crtc_vdisplay; vm.vback_porch = adj->crtc_vtotal - adj->crtc_vsync_end; @@ -95,14 +99,14 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c) (adj->crtc_hdisplay - 1) | ((adj->crtc_vdisplay - 1) << 16)); + prate = clk_get_rate(crtc->dc->hlcdc->sys_clk); + mode_rate = adj->crtc_clock * 1000; if (!crtc->dc->desc->fixed_clksrc) { + prate *= 2; cfg |= ATMEL_HLCDC_CLKSEL; mask |= ATMEL_HLCDC_CLKSEL; } - prate = 2 * clk_get_rate(crtc->dc->hlcdc->sys_clk); - mode_rate = adj->crtc_clock * 1000; - div = DIV_ROUND_UP(prate, mode_rate); if (div < 2) { div = 2; @@ -117,8 +121,8 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c) int div_low = prate / mode_rate; if (div_low >= 2 && - ((prate / div_low - mode_rate) < - 10 * (mode_rate - prate / div))) + (10 * (prate / div_low - mode_rate) < + (mode_rate - prate / div))) /* * At least 10 times better when using a higher * frequency than requested, instead of a lower. @@ -147,6 +151,8 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c) ATMEL_HLCDC_VSPSU | ATMEL_HLCDC_VSPHO | ATMEL_HLCDC_GUARDTIME_MASK | ATMEL_HLCDC_MODE_MASK, cfg); + + clk_disable_unprepare(crtc->dc->hlcdc->sys_clk); } static enum drm_mode_status diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index a48a4c21b1b3..c5e9e2305fff 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -2694,6 +2694,7 @@ static bool drm_dp_get_vc_payload_bw(int dp_link_bw, int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool mst_state) { int ret = 0; + int i = 0; struct drm_dp_mst_branch *mstb = NULL; mutex_lock(&mgr->lock); @@ -2754,10 +2755,21 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms /* this can fail if the device is gone */ drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, 0); ret = 0; + mutex_lock(&mgr->payload_lock); memset(mgr->payloads, 0, mgr->max_payloads * sizeof(struct drm_dp_payload)); mgr->payload_mask = 0; set_bit(0, &mgr->payload_mask); + for (i = 0; i < mgr->max_payloads; i++) { + struct drm_dp_vcpi *vcpi = mgr->proposed_vcpis[i]; + + if (vcpi) { + vcpi->vcpi = 0; + vcpi->num_slots = 0; + } + mgr->proposed_vcpis[i] = NULL; + } mgr->vcpi_mask = 0; + mutex_unlock(&mgr->payload_lock); } out_unlock: diff --git a/drivers/gpu/drm/drm_rect.c b/drivers/gpu/drm/drm_rect.c index b8363aaa9032..818738e83d06 100644 --- a/drivers/gpu/drm/drm_rect.c +++ b/drivers/gpu/drm/drm_rect.c @@ -54,7 +54,12 @@ EXPORT_SYMBOL(drm_rect_intersect); static u32 clip_scaled(u32 src, u32 dst, u32 clip) { - u64 tmp = mul_u32_u32(src, dst - clip); + u64 tmp; + + if (dst == 0) + return 0; + + tmp = mul_u32_u32(src, dst - clip); /* * Round toward 1.0 when clipping so that we don't accidentally diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c index 772f0753ed38..aaf2f26f8505 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c @@ -121,7 +121,7 @@ static void mdp4_dsi_encoder_enable(struct drm_encoder *encoder) if (mdp4_dsi_encoder->enabled) return; - mdp4_crtc_set_config(encoder->crtc, + mdp4_crtc_set_config(encoder->crtc, MDP4_DMA_CONFIG_PACK_ALIGN_MSB | MDP4_DMA_CONFIG_DEFLKR_EN | MDP4_DMA_CONFIG_DITHER_EN | diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 34bd73526afd..930674117533 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1213,10 +1213,7 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, unsigned int i, j; struct page *pg; - if (num_pages < alloc_unit) - return 0; - - for (i = 0; (i * alloc_unit) < num_pages; i++) { + for (i = 0; i < num_pages / alloc_unit; i++) { if (bl_resp->hdr.size + sizeof(union dm_mem_page_range) > PAGE_SIZE) return i * alloc_unit; @@ -1254,7 +1251,7 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, } - return num_pages; + return i * alloc_unit; } static void balloon_up(struct work_struct *dummy) @@ -1269,9 +1266,6 @@ static void balloon_up(struct work_struct *dummy) long avail_pages; unsigned long floor; - /* The host balloons pages in 2M granularity. */ - WARN_ON_ONCE(num_pages % PAGES_IN_2M != 0); - /* * We will attempt 2M allocations. However, if we fail to * allocate 2M chunks, we will go back to 4k allocations. @@ -1281,14 +1275,13 @@ static void balloon_up(struct work_struct *dummy) avail_pages = si_mem_available(); floor = compute_balloon_floor(); - /* Refuse to balloon below the floor, keep the 2M granularity. */ + /* Refuse to balloon below the floor. */ if (avail_pages < num_pages || avail_pages - num_pages < floor) { pr_warn("Balloon request will be partially fulfilled. %s\n", avail_pages < num_pages ? "Not enough memory." : "Balloon floor reached."); num_pages = avail_pages > floor ? (avail_pages - floor) : 0; - num_pages -= num_pages % PAGES_IN_2M; } while (!done) { diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 163ff7ba92b7..fedf6829cdec 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -632,7 +632,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, while (bcnt > 0) { const size_t gup_num_pages = min_t(size_t, - (bcnt + BIT(page_shift) - 1) >> page_shift, + ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE, PAGE_SIZE / sizeof(struct page *)); down_read(&owning_mm->mmap_sem); diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 4950df3f71b6..5c73c0a790fa 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -507,8 +507,7 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr, ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr); if (ret) { /* Undo the effect of adding the outstanding wr */ - gsi->outstanding_pi = (gsi->outstanding_pi - 1) % - gsi->cap.max_send_wr; + gsi->outstanding_pi--; goto err; } spin_unlock_irqrestore(&gsi->lock, flags); diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index deb924e1d790..3d2b63585da9 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -329,6 +329,9 @@ struct cached_dev { */ atomic_t has_dirty; +#define BCH_CACHE_READA_ALL 0 +#define BCH_CACHE_READA_META_ONLY 1 + unsigned int cache_readahead_policy; struct bch_ratelimit writeback_rate; struct delayed_work writeback_rate_update; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 41adcd1546f1..4045ae748f17 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -391,13 +391,20 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) goto skip; /* - * Flag for bypass if the IO is for read-ahead or background, - * unless the read-ahead request is for metadata + * If the bio is for read-ahead or background IO, bypass it or + * not depends on the following situations, + * - If the IO is for meta data, always cache it and no bypass + * - If the IO is not meta data, check dc->cache_reada_policy, + * BCH_CACHE_READA_ALL: cache it and not bypass + * BCH_CACHE_READA_META_ONLY: not cache it and bypass + * That is, read-ahead request for metadata always get cached * (eg, for gfs2 or xfs). */ - if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) && - !(bio->bi_opf & (REQ_META|REQ_PRIO))) - goto skip; + if ((bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND))) { + if (!(bio->bi_opf & (REQ_META|REQ_PRIO)) && + (dc->cache_readahead_policy != BCH_CACHE_READA_ALL)) + goto skip; + } if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) || bio_sectors(bio) & (c->sb.block_size - 1)) { diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 627dcea0f5b6..7f0fb4b5755a 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -27,6 +27,12 @@ static const char * const bch_cache_modes[] = { NULL }; +static const char * const bch_reada_cache_policies[] = { + "all", + "meta-only", + NULL +}; + /* Default is 0 ("auto") */ static const char * const bch_stop_on_failure_modes[] = { "auto", @@ -100,6 +106,7 @@ rw_attribute(congested_write_threshold_us); rw_attribute(sequential_cutoff); rw_attribute(data_csum); rw_attribute(cache_mode); +rw_attribute(readahead_cache_policy); rw_attribute(stop_when_cache_set_failed); rw_attribute(writeback_metadata); rw_attribute(writeback_running); @@ -167,6 +174,11 @@ SHOW(__bch_cached_dev) bch_cache_modes, BDEV_CACHE_MODE(&dc->sb)); + if (attr == &sysfs_readahead_cache_policy) + return bch_snprint_string_list(buf, PAGE_SIZE, + bch_reada_cache_policies, + dc->cache_readahead_policy); + if (attr == &sysfs_stop_when_cache_set_failed) return bch_snprint_string_list(buf, PAGE_SIZE, bch_stop_on_failure_modes, @@ -352,6 +364,15 @@ STORE(__cached_dev) } } + if (attr == &sysfs_readahead_cache_policy) { + v = __sysfs_match_string(bch_reada_cache_policies, -1, buf); + if (v < 0) + return v; + + if ((unsigned int) v != dc->cache_readahead_policy) + dc->cache_readahead_policy = v; + } + if (attr == &sysfs_stop_when_cache_set_failed) { v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf); if (v < 0) @@ -466,6 +487,7 @@ static struct attribute *bch_cached_dev_files[] = { &sysfs_data_csum, #endif &sysfs_cache_mode, + &sysfs_readahead_cache_policy, &sysfs_stop_when_cache_set_failed, &sysfs_writeback_metadata, &sysfs_writeback_running, diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index eb9782fc93fe..492bbe0584d9 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -331,8 +331,14 @@ static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, static int crypt_iv_benbi_ctr(struct crypt_config *cc, struct dm_target *ti, const char *opts) { - unsigned bs = crypto_skcipher_blocksize(any_tfm(cc)); - int log = ilog2(bs); + unsigned bs; + int log; + + if (test_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags)) + bs = crypto_aead_blocksize(any_tfm_aead(cc)); + else + bs = crypto_skcipher_blocksize(any_tfm(cc)); + log = ilog2(bs); /* we need to calculate how far we must shift the sector count * to get the cipher block count, we use this shift in _gen */ @@ -717,7 +723,7 @@ static int crypt_iv_eboiv_gen(struct crypt_config *cc, u8 *iv, struct crypto_wait wait; int err; - req = skcipher_request_alloc(any_tfm(cc), GFP_KERNEL | GFP_NOFS); + req = skcipher_request_alloc(any_tfm(cc), GFP_NOIO); if (!req) return -ENOMEM; diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index b88d6d701f5b..8bb723f1a569 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -387,16 +387,15 @@ static int subtree_equal(void *context, const void *value1_le, const void *value * Variant that is used for in-core only changes or code that * shouldn't put the pool in service on its own (e.g. commit). */ -static inline void __pmd_write_lock(struct dm_pool_metadata *pmd) +static inline void pmd_write_lock_in_core(struct dm_pool_metadata *pmd) __acquires(pmd->root_lock) { down_write(&pmd->root_lock); } -#define pmd_write_lock_in_core(pmd) __pmd_write_lock((pmd)) static inline void pmd_write_lock(struct dm_pool_metadata *pmd) { - __pmd_write_lock(pmd); + pmd_write_lock_in_core(pmd); if (unlikely(!pmd->in_service)) pmd->in_service = true; } @@ -831,6 +830,7 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) * We need to know if the thin_disk_superblock exceeds a 512-byte sector. */ BUILD_BUG_ON(sizeof(struct thin_disk_superblock) > 512); + BUG_ON(!rwsem_is_locked(&pmd->root_lock)); if (unlikely(!pmd->in_service)) return 0; @@ -953,6 +953,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) return -EBUSY; } + pmd_write_lock_in_core(pmd); if (!dm_bm_is_read_only(pmd->bm) && !pmd->fail_io) { r = __commit_transaction(pmd); if (r < 0) @@ -961,6 +962,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) } if (!pmd->fail_io) __destroy_persistent_data_objects(pmd); + pmd_write_unlock(pmd); kfree(pmd); return 0; @@ -1841,7 +1843,7 @@ int dm_pool_commit_metadata(struct dm_pool_metadata *pmd) * Care is taken to not have commit be what * triggers putting the thin-pool in-service. */ - __pmd_write_lock(pmd); + pmd_write_lock_in_core(pmd); if (pmd->fail_io) goto out; diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 43d1af1d8173..07c1b0334f57 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -442,7 +442,13 @@ static void writecache_notify_io(unsigned long error, void *context) complete(&endio->c); } -static void ssd_commit_flushed(struct dm_writecache *wc) +static void writecache_wait_for_ios(struct dm_writecache *wc, int direction) +{ + wait_event(wc->bio_in_progress_wait[direction], + !atomic_read(&wc->bio_in_progress[direction])); +} + +static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) { struct dm_io_region region; struct dm_io_request req; @@ -488,17 +494,20 @@ static void ssd_commit_flushed(struct dm_writecache *wc) writecache_notify_io(0, &endio); wait_for_completion_io(&endio.c); + if (wait_for_ios) + writecache_wait_for_ios(wc, WRITE); + writecache_disk_flush(wc, wc->ssd_dev); memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size); } -static void writecache_commit_flushed(struct dm_writecache *wc) +static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) { if (WC_MODE_PMEM(wc)) wmb(); else - ssd_commit_flushed(wc); + ssd_commit_flushed(wc, wait_for_ios); } static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) @@ -522,12 +531,6 @@ static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) writecache_error(wc, r, "error flushing metadata: %d", r); } -static void writecache_wait_for_ios(struct dm_writecache *wc, int direction) -{ - wait_event(wc->bio_in_progress_wait[direction], - !atomic_read(&wc->bio_in_progress[direction])); -} - #define WFE_RETURN_FOLLOWING 1 #define WFE_LOWEST_SEQ 2 @@ -724,15 +727,12 @@ static void writecache_flush(struct dm_writecache *wc) e = e2; cond_resched(); } - writecache_commit_flushed(wc); - - if (!WC_MODE_PMEM(wc)) - writecache_wait_for_ios(wc, WRITE); + writecache_commit_flushed(wc, true); wc->seq_count++; pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count)); writecache_flush_region(wc, &sb(wc)->seq_count, sizeof sb(wc)->seq_count); - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); wc->overwrote_committed = false; @@ -756,7 +756,7 @@ static void writecache_flush(struct dm_writecache *wc) } if (need_flush_after_free) - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); } static void writecache_flush_work(struct work_struct *work) @@ -809,7 +809,7 @@ static void writecache_discard(struct dm_writecache *wc, sector_t start, sector_ } if (discarded_something) - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); } static bool writecache_wait_for_writeback(struct dm_writecache *wc) @@ -958,7 +958,7 @@ erase_this: if (need_flush) { writecache_flush_all_metadata(wc); - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); } wc_unlock(wc); @@ -1342,7 +1342,7 @@ static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head * wc->writeback_size--; n_walked++; if (unlikely(n_walked >= ENDIO_LATENCY)) { - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); wc_unlock(wc); wc_lock(wc); n_walked = 0; @@ -1423,7 +1423,7 @@ pop_from_list: writecache_wait_for_ios(wc, READ); } - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); wc_unlock(wc); } @@ -1766,10 +1766,10 @@ static int init_memory(struct dm_writecache *wc) write_original_sector_seq_count(wc, &wc->entries[b], -1, -1); writecache_flush_all_metadata(wc); - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC)); writecache_flush_region(wc, &sb(wc)->magic, sizeof sb(wc)->magic); - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); return 0; } diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index ac1179ca80d9..5205cf9bbfd9 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -134,6 +134,7 @@ struct dmz_metadata { sector_t zone_bitmap_size; unsigned int zone_nr_bitmap_blocks; + unsigned int zone_bits_per_mblk; unsigned int nr_bitmap_blocks; unsigned int nr_map_blocks; @@ -1167,7 +1168,10 @@ static int dmz_init_zones(struct dmz_metadata *zmd) /* Init */ zmd->zone_bitmap_size = dev->zone_nr_blocks >> 3; - zmd->zone_nr_bitmap_blocks = zmd->zone_bitmap_size >> DMZ_BLOCK_SHIFT; + zmd->zone_nr_bitmap_blocks = + max_t(sector_t, 1, zmd->zone_bitmap_size >> DMZ_BLOCK_SHIFT); + zmd->zone_bits_per_mblk = min_t(sector_t, dev->zone_nr_blocks, + DMZ_BLOCK_SIZE_BITS); /* Allocate zone array */ zmd->zones = kcalloc(dev->nr_zones, sizeof(struct dm_zone), GFP_KERNEL); @@ -1991,7 +1995,7 @@ int dmz_copy_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone, dmz_release_mblock(zmd, to_mblk); dmz_release_mblock(zmd, from_mblk); - chunk_block += DMZ_BLOCK_SIZE_BITS; + chunk_block += zmd->zone_bits_per_mblk; } to_zone->weight = from_zone->weight; @@ -2052,7 +2056,7 @@ int dmz_validate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, /* Set bits */ bit = chunk_block & DMZ_BLOCK_MASK_BITS; - nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); + nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit); count = dmz_set_bits((unsigned long *)mblk->data, bit, nr_bits); if (count) { @@ -2131,7 +2135,7 @@ int dmz_invalidate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, /* Clear bits */ bit = chunk_block & DMZ_BLOCK_MASK_BITS; - nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); + nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit); count = dmz_clear_bits((unsigned long *)mblk->data, bit, nr_bits); @@ -2191,6 +2195,7 @@ static int dmz_to_next_set_block(struct dmz_metadata *zmd, struct dm_zone *zone, { struct dmz_mblock *mblk; unsigned int bit, set_bit, nr_bits; + unsigned int zone_bits = zmd->zone_bits_per_mblk; unsigned long *bitmap; int n = 0; @@ -2205,15 +2210,15 @@ static int dmz_to_next_set_block(struct dmz_metadata *zmd, struct dm_zone *zone, /* Get offset */ bitmap = (unsigned long *) mblk->data; bit = chunk_block & DMZ_BLOCK_MASK_BITS; - nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); + nr_bits = min(nr_blocks, zone_bits - bit); if (set) - set_bit = find_next_bit(bitmap, DMZ_BLOCK_SIZE_BITS, bit); + set_bit = find_next_bit(bitmap, zone_bits, bit); else - set_bit = find_next_zero_bit(bitmap, DMZ_BLOCK_SIZE_BITS, bit); + set_bit = find_next_zero_bit(bitmap, zone_bits, bit); dmz_release_mblock(zmd, mblk); n += set_bit - bit; - if (set_bit < DMZ_BLOCK_SIZE_BITS) + if (set_bit < zone_bits) break; nr_blocks -= nr_bits; @@ -2316,7 +2321,7 @@ static void dmz_get_zone_weight(struct dmz_metadata *zmd, struct dm_zone *zone) /* Count bits in this block */ bitmap = mblk->data; bit = chunk_block & DMZ_BLOCK_MASK_BITS; - nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); + nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit); n += dmz_count_bits(bitmap, bit, nr_bits); dmz_release_mblock(zmd, mblk); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1a5e328c443a..6d3cc235f842 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1880,6 +1880,7 @@ static void dm_init_normal_md_queue(struct mapped_device *md) /* * Initialize aspects of queue that aren't relevant for blk-mq */ + md->queue->backing_dev_info->congested_data = md; md->queue->backing_dev_info->congested_fn = dm_any_congested; } @@ -1970,7 +1971,12 @@ static struct mapped_device *alloc_dev(int minor) if (!md->queue) goto bad; md->queue->queuedata = md; - md->queue->backing_dev_info->congested_data = md; + /* + * default to bio-based required ->make_request_fn until DM + * table is loaded and md->type established. If request-based + * table is loaded: blk-mq will override accordingly. + */ + blk_queue_make_request(md->queue, dm_make_request); md->disk = alloc_disk_node(1, md->numa_node_id); if (!md->disk) @@ -2285,7 +2291,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) case DM_TYPE_DAX_BIO_BASED: case DM_TYPE_NVME_BIO_BASED: dm_init_normal_md_queue(md); - blk_queue_make_request(md->queue, dm_make_request); break; case DM_TYPE_NONE: WARN_ON_ONCE(true); diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index bd68f6fef694..d8b4125e338c 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -380,6 +380,33 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, return -ENOSPC; } +int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, + dm_block_t begin, dm_block_t end, dm_block_t *b) +{ + int r; + uint32_t count; + + do { + r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b); + if (r) + break; + + /* double check this block wasn't used in the old transaction */ + if (*b >= old_ll->nr_blocks) + count = 0; + else { + r = sm_ll_lookup(old_ll, *b, &count); + if (r) + break; + + if (count) + begin = *b + 1; + } + } while (count); + + return r; +} + static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, int (*mutator)(void *context, uint32_t old, uint32_t *new), void *context, enum allocation_event *ev) diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h index b3078d5eda0c..8de63ce39bdd 100644 --- a/drivers/md/persistent-data/dm-space-map-common.h +++ b/drivers/md/persistent-data/dm-space-map-common.h @@ -109,6 +109,8 @@ int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result); int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result); int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, dm_block_t end, dm_block_t *result); +int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, + dm_block_t begin, dm_block_t end, dm_block_t *result); int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, enum allocation_event *ev); int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev); int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev); diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index 32adf6b4a9c7..bf4c5e2ccb6f 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c @@ -167,8 +167,10 @@ static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b) enum allocation_event ev; struct sm_disk *smd = container_of(sm, struct sm_disk, sm); - /* FIXME: we should loop round a couple of times */ - r = sm_ll_find_free_block(&smd->old_ll, smd->begin, smd->old_ll.nr_blocks, b); + /* + * Any block we allocate has to be free in both the old and current ll. + */ + r = sm_ll_find_common_free_block(&smd->old_ll, &smd->ll, smd->begin, smd->ll.nr_blocks, b); if (r) return r; diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 25328582cc48..9e3c64ec2026 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -448,7 +448,10 @@ static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) enum allocation_event ev; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); - r = sm_ll_find_free_block(&smm->old_ll, smm->begin, smm->old_ll.nr_blocks, b); + /* + * Any block we allocate has to be free in both the old and current ll. + */ + r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, smm->begin, smm->ll.nr_blocks, b); if (r) return r; diff --git a/drivers/media/rc/iguanair.c b/drivers/media/rc/iguanair.c index 872d6441e512..a7deca1fefb7 100644 --- a/drivers/media/rc/iguanair.c +++ b/drivers/media/rc/iguanair.c @@ -413,7 +413,7 @@ static int iguanair_probe(struct usb_interface *intf, int ret, pipein, pipeout; struct usb_host_interface *idesc; - idesc = intf->altsetting; + idesc = intf->cur_altsetting; if (idesc->desc.bNumEndpoints < 2) return -ENODEV; diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index 7741151606ef..6f80c251f641 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -1891,23 +1891,28 @@ int rc_register_device(struct rc_dev *dev) dev->registered = true; - if (dev->driver_type != RC_DRIVER_IR_RAW_TX) { - rc = rc_setup_rx_device(dev); - if (rc) - goto out_dev; - } - - /* Ensure that the lirc kfifo is setup before we start the thread */ + /* + * once the the input device is registered in rc_setup_rx_device, + * userspace can open the input device and rc_open() will be called + * as a result. This results in driver code being allowed to submit + * keycodes with rc_keydown, so lirc must be registered first. + */ if (dev->allowed_protocols != RC_PROTO_BIT_CEC) { rc = ir_lirc_register(dev); if (rc < 0) - goto out_rx; + goto out_dev; + } + + if (dev->driver_type != RC_DRIVER_IR_RAW_TX) { + rc = rc_setup_rx_device(dev); + if (rc) + goto out_lirc; } if (dev->driver_type == RC_DRIVER_IR_RAW) { rc = ir_raw_event_register(dev); if (rc < 0) - goto out_lirc; + goto out_rx; } dev_dbg(&dev->dev, "Registered rc%u (driver: %s)\n", dev->minor, @@ -1915,11 +1920,11 @@ int rc_register_device(struct rc_dev *dev) return 0; +out_rx: + rc_free_rx_device(dev); out_lirc: if (dev->allowed_protocols != RC_PROTO_BIT_CEC) ir_lirc_unregister(dev); -out_rx: - rc_free_rx_device(dev); out_dev: device_del(&dev->dev); out_rx_free: diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 428235ca2635..2b688cc39bb8 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -1493,6 +1493,11 @@ static int uvc_scan_chain_forward(struct uvc_video_chain *chain, break; if (forward == prev) continue; + if (forward->chain.next || forward->chain.prev) { + uvc_trace(UVC_TRACE_DESCR, "Found reference to " + "entity %d already in chain.\n", forward->id); + return -EINVAL; + } switch (UVC_ENTITY_TYPE(forward)) { case UVC_VC_EXTENSION_UNIT: @@ -1574,6 +1579,13 @@ static int uvc_scan_chain_backward(struct uvc_video_chain *chain, return -1; } + if (term->chain.next || term->chain.prev) { + uvc_trace(UVC_TRACE_DESCR, "Found reference to " + "entity %d already in chain.\n", + term->id); + return -EINVAL; + } + if (uvc_trace_param & UVC_TRACE_PROBE) printk(KERN_CONT " %d", term->id); diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index e1eaf1135c7f..7ad6db8dd9f6 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -1183,36 +1183,38 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar u32 aux_space; int compatible_arg = 1; long err = 0; + unsigned int ncmd; /* * 1. When struct size is different, converts the command. */ switch (cmd) { - case VIDIOC_G_FMT32: cmd = VIDIOC_G_FMT; break; - case VIDIOC_S_FMT32: cmd = VIDIOC_S_FMT; break; - case VIDIOC_QUERYBUF32: cmd = VIDIOC_QUERYBUF; break; - case VIDIOC_G_FBUF32: cmd = VIDIOC_G_FBUF; break; - case VIDIOC_S_FBUF32: cmd = VIDIOC_S_FBUF; break; - case VIDIOC_QBUF32: cmd = VIDIOC_QBUF; break; - case VIDIOC_DQBUF32: cmd = VIDIOC_DQBUF; break; - case VIDIOC_ENUMSTD32: cmd = VIDIOC_ENUMSTD; break; - case VIDIOC_ENUMINPUT32: cmd = VIDIOC_ENUMINPUT; break; - case VIDIOC_TRY_FMT32: cmd = VIDIOC_TRY_FMT; break; - case VIDIOC_G_EXT_CTRLS32: cmd = VIDIOC_G_EXT_CTRLS; break; - case VIDIOC_S_EXT_CTRLS32: cmd = VIDIOC_S_EXT_CTRLS; break; - case VIDIOC_TRY_EXT_CTRLS32: cmd = VIDIOC_TRY_EXT_CTRLS; break; - case VIDIOC_DQEVENT32: cmd = VIDIOC_DQEVENT; break; - case VIDIOC_OVERLAY32: cmd = VIDIOC_OVERLAY; break; - case VIDIOC_STREAMON32: cmd = VIDIOC_STREAMON; break; - case VIDIOC_STREAMOFF32: cmd = VIDIOC_STREAMOFF; break; - case VIDIOC_G_INPUT32: cmd = VIDIOC_G_INPUT; break; - case VIDIOC_S_INPUT32: cmd = VIDIOC_S_INPUT; break; - case VIDIOC_G_OUTPUT32: cmd = VIDIOC_G_OUTPUT; break; - case VIDIOC_S_OUTPUT32: cmd = VIDIOC_S_OUTPUT; break; - case VIDIOC_CREATE_BUFS32: cmd = VIDIOC_CREATE_BUFS; break; - case VIDIOC_PREPARE_BUF32: cmd = VIDIOC_PREPARE_BUF; break; - case VIDIOC_G_EDID32: cmd = VIDIOC_G_EDID; break; - case VIDIOC_S_EDID32: cmd = VIDIOC_S_EDID; break; + case VIDIOC_G_FMT32: ncmd = VIDIOC_G_FMT; break; + case VIDIOC_S_FMT32: ncmd = VIDIOC_S_FMT; break; + case VIDIOC_QUERYBUF32: ncmd = VIDIOC_QUERYBUF; break; + case VIDIOC_G_FBUF32: ncmd = VIDIOC_G_FBUF; break; + case VIDIOC_S_FBUF32: ncmd = VIDIOC_S_FBUF; break; + case VIDIOC_QBUF32: ncmd = VIDIOC_QBUF; break; + case VIDIOC_DQBUF32: ncmd = VIDIOC_DQBUF; break; + case VIDIOC_ENUMSTD32: ncmd = VIDIOC_ENUMSTD; break; + case VIDIOC_ENUMINPUT32: ncmd = VIDIOC_ENUMINPUT; break; + case VIDIOC_TRY_FMT32: ncmd = VIDIOC_TRY_FMT; break; + case VIDIOC_G_EXT_CTRLS32: ncmd = VIDIOC_G_EXT_CTRLS; break; + case VIDIOC_S_EXT_CTRLS32: ncmd = VIDIOC_S_EXT_CTRLS; break; + case VIDIOC_TRY_EXT_CTRLS32: ncmd = VIDIOC_TRY_EXT_CTRLS; break; + case VIDIOC_DQEVENT32: ncmd = VIDIOC_DQEVENT; break; + case VIDIOC_OVERLAY32: ncmd = VIDIOC_OVERLAY; break; + case VIDIOC_STREAMON32: ncmd = VIDIOC_STREAMON; break; + case VIDIOC_STREAMOFF32: ncmd = VIDIOC_STREAMOFF; break; + case VIDIOC_G_INPUT32: ncmd = VIDIOC_G_INPUT; break; + case VIDIOC_S_INPUT32: ncmd = VIDIOC_S_INPUT; break; + case VIDIOC_G_OUTPUT32: ncmd = VIDIOC_G_OUTPUT; break; + case VIDIOC_S_OUTPUT32: ncmd = VIDIOC_S_OUTPUT; break; + case VIDIOC_CREATE_BUFS32: ncmd = VIDIOC_CREATE_BUFS; break; + case VIDIOC_PREPARE_BUF32: ncmd = VIDIOC_PREPARE_BUF; break; + case VIDIOC_G_EDID32: ncmd = VIDIOC_G_EDID; break; + case VIDIOC_S_EDID32: ncmd = VIDIOC_S_EDID; break; + default: ncmd = cmd; break; } /* @@ -1221,11 +1223,11 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar * argument into it. */ switch (cmd) { - case VIDIOC_OVERLAY: - case VIDIOC_STREAMON: - case VIDIOC_STREAMOFF: - case VIDIOC_S_INPUT: - case VIDIOC_S_OUTPUT: + case VIDIOC_OVERLAY32: + case VIDIOC_STREAMON32: + case VIDIOC_STREAMOFF32: + case VIDIOC_S_INPUT32: + case VIDIOC_S_OUTPUT32: err = alloc_userspace(sizeof(unsigned int), 0, &new_p64); if (!err && assign_in_user((unsigned int __user *)new_p64, (compat_uint_t __user *)p32)) @@ -1233,23 +1235,23 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar compatible_arg = 0; break; - case VIDIOC_G_INPUT: - case VIDIOC_G_OUTPUT: + case VIDIOC_G_INPUT32: + case VIDIOC_G_OUTPUT32: err = alloc_userspace(sizeof(unsigned int), 0, &new_p64); compatible_arg = 0; break; - case VIDIOC_G_EDID: - case VIDIOC_S_EDID: + case VIDIOC_G_EDID32: + case VIDIOC_S_EDID32: err = alloc_userspace(sizeof(struct v4l2_edid), 0, &new_p64); if (!err) err = get_v4l2_edid32(new_p64, p32); compatible_arg = 0; break; - case VIDIOC_G_FMT: - case VIDIOC_S_FMT: - case VIDIOC_TRY_FMT: + case VIDIOC_G_FMT32: + case VIDIOC_S_FMT32: + case VIDIOC_TRY_FMT32: err = bufsize_v4l2_format(p32, &aux_space); if (!err) err = alloc_userspace(sizeof(struct v4l2_format), @@ -1262,7 +1264,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar compatible_arg = 0; break; - case VIDIOC_CREATE_BUFS: + case VIDIOC_CREATE_BUFS32: err = bufsize_v4l2_create(p32, &aux_space); if (!err) err = alloc_userspace(sizeof(struct v4l2_create_buffers), @@ -1275,10 +1277,10 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar compatible_arg = 0; break; - case VIDIOC_PREPARE_BUF: - case VIDIOC_QUERYBUF: - case VIDIOC_QBUF: - case VIDIOC_DQBUF: + case VIDIOC_PREPARE_BUF32: + case VIDIOC_QUERYBUF32: + case VIDIOC_QBUF32: + case VIDIOC_DQBUF32: err = bufsize_v4l2_buffer(p32, &aux_space); if (!err) err = alloc_userspace(sizeof(struct v4l2_buffer), @@ -1291,7 +1293,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar compatible_arg = 0; break; - case VIDIOC_S_FBUF: + case VIDIOC_S_FBUF32: err = alloc_userspace(sizeof(struct v4l2_framebuffer), 0, &new_p64); if (!err) @@ -1299,13 +1301,13 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar compatible_arg = 0; break; - case VIDIOC_G_FBUF: + case VIDIOC_G_FBUF32: err = alloc_userspace(sizeof(struct v4l2_framebuffer), 0, &new_p64); compatible_arg = 0; break; - case VIDIOC_ENUMSTD: + case VIDIOC_ENUMSTD32: err = alloc_userspace(sizeof(struct v4l2_standard), 0, &new_p64); if (!err) @@ -1313,16 +1315,16 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar compatible_arg = 0; break; - case VIDIOC_ENUMINPUT: + case VIDIOC_ENUMINPUT32: err = alloc_userspace(sizeof(struct v4l2_input), 0, &new_p64); if (!err) err = get_v4l2_input32(new_p64, p32); compatible_arg = 0; break; - case VIDIOC_G_EXT_CTRLS: - case VIDIOC_S_EXT_CTRLS: - case VIDIOC_TRY_EXT_CTRLS: + case VIDIOC_G_EXT_CTRLS32: + case VIDIOC_S_EXT_CTRLS32: + case VIDIOC_TRY_EXT_CTRLS32: err = bufsize_v4l2_ext_controls(p32, &aux_space); if (!err) err = alloc_userspace(sizeof(struct v4l2_ext_controls), @@ -1334,7 +1336,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar } compatible_arg = 0; break; - case VIDIOC_DQEVENT: + case VIDIOC_DQEVENT32: err = alloc_userspace(sizeof(struct v4l2_event), 0, &new_p64); compatible_arg = 0; break; @@ -1352,9 +1354,9 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar * Otherwise, it will pass the newly allocated @new_p64 argument. */ if (compatible_arg) - err = native_ioctl(file, cmd, (unsigned long)p32); + err = native_ioctl(file, ncmd, (unsigned long)p32); else - err = native_ioctl(file, cmd, (unsigned long)new_p64); + err = native_ioctl(file, ncmd, (unsigned long)new_p64); if (err == -ENOTTY) return err; @@ -1370,13 +1372,13 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar * the blocks to maximum allowed value. */ switch (cmd) { - case VIDIOC_G_EXT_CTRLS: - case VIDIOC_S_EXT_CTRLS: - case VIDIOC_TRY_EXT_CTRLS: + case VIDIOC_G_EXT_CTRLS32: + case VIDIOC_S_EXT_CTRLS32: + case VIDIOC_TRY_EXT_CTRLS32: if (put_v4l2_ext_controls32(file, new_p64, p32)) err = -EFAULT; break; - case VIDIOC_S_EDID: + case VIDIOC_S_EDID32: if (put_v4l2_edid32(new_p64, p32)) err = -EFAULT; break; @@ -1389,49 +1391,49 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar * the original 32 bits structure. */ switch (cmd) { - case VIDIOC_S_INPUT: - case VIDIOC_S_OUTPUT: - case VIDIOC_G_INPUT: - case VIDIOC_G_OUTPUT: + case VIDIOC_S_INPUT32: + case VIDIOC_S_OUTPUT32: + case VIDIOC_G_INPUT32: + case VIDIOC_G_OUTPUT32: if (assign_in_user((compat_uint_t __user *)p32, ((unsigned int __user *)new_p64))) err = -EFAULT; break; - case VIDIOC_G_FBUF: + case VIDIOC_G_FBUF32: err = put_v4l2_framebuffer32(new_p64, p32); break; - case VIDIOC_DQEVENT: + case VIDIOC_DQEVENT32: err = put_v4l2_event32(new_p64, p32); break; - case VIDIOC_G_EDID: + case VIDIOC_G_EDID32: err = put_v4l2_edid32(new_p64, p32); break; - case VIDIOC_G_FMT: - case VIDIOC_S_FMT: - case VIDIOC_TRY_FMT: + case VIDIOC_G_FMT32: + case VIDIOC_S_FMT32: + case VIDIOC_TRY_FMT32: err = put_v4l2_format32(new_p64, p32); break; - case VIDIOC_CREATE_BUFS: + case VIDIOC_CREATE_BUFS32: err = put_v4l2_create32(new_p64, p32); break; - case VIDIOC_PREPARE_BUF: - case VIDIOC_QUERYBUF: - case VIDIOC_QBUF: - case VIDIOC_DQBUF: + case VIDIOC_PREPARE_BUF32: + case VIDIOC_QUERYBUF32: + case VIDIOC_QBUF32: + case VIDIOC_DQBUF32: err = put_v4l2_buffer32(new_p64, p32); break; - case VIDIOC_ENUMSTD: + case VIDIOC_ENUMSTD32: err = put_v4l2_standard32(new_p64, p32); break; - case VIDIOC_ENUMINPUT: + case VIDIOC_ENUMINPUT32: err = put_v4l2_input32(new_p64, p32); break; } diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c index 66a6c6c236a7..28262190c3ab 100644 --- a/drivers/media/v4l2-core/videobuf-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf-dma-sg.c @@ -349,8 +349,11 @@ int videobuf_dma_free(struct videobuf_dmabuf *dma) BUG_ON(dma->sglen); if (dma->pages) { - for (i = 0; i < dma->nr_pages; i++) + for (i = 0; i < dma->nr_pages; i++) { + if (dma->direction == DMA_FROM_DEVICE) + set_page_dirty_lock(dma->pages[i]); put_page(dma->pages[i]); + } kfree(dma->pages); dma->pages = NULL; } diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c index a4aaadaa0cb0..aa59496e4376 100644 --- a/drivers/mfd/axp20x.c +++ b/drivers/mfd/axp20x.c @@ -126,7 +126,7 @@ static const struct regmap_range axp288_writeable_ranges[] = { static const struct regmap_range axp288_volatile_ranges[] = { regmap_reg_range(AXP20X_PWR_INPUT_STATUS, AXP288_POWER_REASON), regmap_reg_range(AXP288_BC_GLOBAL, AXP288_BC_GLOBAL), - regmap_reg_range(AXP288_BC_DET_STAT, AXP288_BC_DET_STAT), + regmap_reg_range(AXP288_BC_DET_STAT, AXP20X_VBUS_IPSOUT_MGMT), regmap_reg_range(AXP20X_CHRG_BAK_CTRL, AXP20X_CHRG_BAK_CTRL), regmap_reg_range(AXP20X_IRQ1_EN, AXP20X_IPSOUT_V_HIGH_L), regmap_reg_range(AXP20X_TIMER_CTRL, AXP20X_TIMER_CTRL), diff --git a/drivers/mfd/da9062-core.c b/drivers/mfd/da9062-core.c index e69626867c26..9143de7b77b8 100644 --- a/drivers/mfd/da9062-core.c +++ b/drivers/mfd/da9062-core.c @@ -248,7 +248,7 @@ static const struct mfd_cell da9062_devs[] = { .name = "da9062-watchdog", .num_resources = ARRAY_SIZE(da9062_wdt_resources), .resources = da9062_wdt_resources, - .of_compatible = "dlg,da9062-wdt", + .of_compatible = "dlg,da9062-watchdog", }, { .name = "da9062-thermal", diff --git a/drivers/mfd/dln2.c b/drivers/mfd/dln2.c index 381593fbe50f..7841c11411d0 100644 --- a/drivers/mfd/dln2.c +++ b/drivers/mfd/dln2.c @@ -722,6 +722,8 @@ static int dln2_probe(struct usb_interface *interface, const struct usb_device_id *usb_id) { struct usb_host_interface *hostif = interface->cur_altsetting; + struct usb_endpoint_descriptor *epin; + struct usb_endpoint_descriptor *epout; struct device *dev = &interface->dev; struct dln2_dev *dln2; int ret; @@ -731,12 +733,19 @@ static int dln2_probe(struct usb_interface *interface, hostif->desc.bNumEndpoints < 2) return -ENODEV; + epin = &hostif->endpoint[0].desc; + epout = &hostif->endpoint[1].desc; + if (!usb_endpoint_is_bulk_out(epout)) + return -ENODEV; + if (!usb_endpoint_is_bulk_in(epin)) + return -ENODEV; + dln2 = kzalloc(sizeof(*dln2), GFP_KERNEL); if (!dln2) return -ENOMEM; - dln2->ep_out = hostif->endpoint[0].desc.bEndpointAddress; - dln2->ep_in = hostif->endpoint[1].desc.bEndpointAddress; + dln2->ep_out = epout->bEndpointAddress; + dln2->ep_in = epin->bEndpointAddress; dln2->usb_dev = usb_get_dev(interface_to_usbdev(interface)); dln2->interface = interface; usb_set_intfdata(interface, dln2); diff --git a/drivers/mfd/rn5t618.c b/drivers/mfd/rn5t618.c index da5cd9c92a59..ead2e79036a9 100644 --- a/drivers/mfd/rn5t618.c +++ b/drivers/mfd/rn5t618.c @@ -26,6 +26,7 @@ static bool rn5t618_volatile_reg(struct device *dev, unsigned int reg) case RN5T618_WATCHDOGCNT: case RN5T618_DCIRQ: case RN5T618_ILIMDATAH ... RN5T618_AIN0DATAL: + case RN5T618_ADCCNT3: case RN5T618_IR_ADC1 ... RN5T618_IR_ADC3: case RN5T618_IR_GPR: case RN5T618_IR_GPF: diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 66e354d51ee9..7083d8ddd495 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1134,17 +1134,22 @@ static void mmc_spi_initsequence(struct mmc_spi_host *host) * SPI protocol. Another is that when chipselect is released while * the card returns BUSY status, the clock must issue several cycles * with chipselect high before the card will stop driving its output. + * + * SPI_CS_HIGH means "asserted" here. In some cases like when using + * GPIOs for chip select, SPI_CS_HIGH is set but this will be logically + * inverted by gpiolib, so if we want to ascertain to drive it high + * we should toggle the default with an XOR as we do here. */ - host->spi->mode |= SPI_CS_HIGH; + host->spi->mode ^= SPI_CS_HIGH; if (spi_setup(host->spi) != 0) { /* Just warn; most cards work without it. */ dev_warn(&host->spi->dev, "can't change chip-select polarity\n"); - host->spi->mode &= ~SPI_CS_HIGH; + host->spi->mode ^= SPI_CS_HIGH; } else { mmc_spi_readbytes(host, 18); - host->spi->mode &= ~SPI_CS_HIGH; + host->spi->mode ^= SPI_CS_HIGH; if (spi_setup(host->spi) != 0) { /* Wot, we can't get the same setup we had before? */ dev_err(&host->spi->dev, diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c index 0ae986c42bc8..9378d5dc86c8 100644 --- a/drivers/mmc/host/sdhci-of-at91.c +++ b/drivers/mmc/host/sdhci-of-at91.c @@ -324,19 +324,22 @@ static int sdhci_at91_probe(struct platform_device *pdev) priv->mainck = devm_clk_get(&pdev->dev, "baseclk"); if (IS_ERR(priv->mainck)) { dev_err(&pdev->dev, "failed to get baseclk\n"); - return PTR_ERR(priv->mainck); + ret = PTR_ERR(priv->mainck); + goto sdhci_pltfm_free; } priv->hclock = devm_clk_get(&pdev->dev, "hclock"); if (IS_ERR(priv->hclock)) { dev_err(&pdev->dev, "failed to get hclock\n"); - return PTR_ERR(priv->hclock); + ret = PTR_ERR(priv->hclock); + goto sdhci_pltfm_free; } priv->gck = devm_clk_get(&pdev->dev, "multclk"); if (IS_ERR(priv->gck)) { dev_err(&pdev->dev, "failed to get multclk\n"); - return PTR_ERR(priv->gck); + ret = PTR_ERR(priv->gck); + goto sdhci_pltfm_free; } ret = sdhci_at91_set_clks_presets(&pdev->dev); diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index c9ea365c248c..5091e2c1c0e5 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -1604,7 +1604,7 @@ static u32 sdhci_read_present_state(struct sdhci_host *host) return sdhci_readl(host, SDHCI_PRESENT_STATE); } -void amd_sdhci_reset(struct sdhci_host *host, u8 mask) +static void amd_sdhci_reset(struct sdhci_host *host, u8 mask) { struct sdhci_pci_slot *slot = sdhci_priv(host); struct pci_dev *pdev = slot->chip->pdev; diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 309c808351ac..f417fb680cd8 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -2310,15 +2310,16 @@ static const struct flash_info spi_nor_ids[] = { { "n25q256a", INFO(0x20ba19, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, { "n25q256ax1", INFO(0x20bb19, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_QUAD_READ) }, { "n25q512ax3", INFO(0x20ba20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) }, + { "mt25qu512a", INFO6(0x20bb20, 0x104400, 64 * 1024, 1024, + SECT_4K | USE_FSR | SPI_NOR_DUAL_READ | + SPI_NOR_QUAD_READ | SPI_NOR_4B_OPCODES) }, + { "n25q512a", INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K | + SPI_NOR_QUAD_READ) }, { "n25q00", INFO(0x20ba21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) }, { "n25q00a", INFO(0x20bb21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) }, { "mt25ql02g", INFO(0x20ba22, 0, 64 * 1024, 4096, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) }, - { "mt25qu512a (n25q512a)", INFO(0x20bb20, 0, 64 * 1024, 1024, - SECT_4K | USE_FSR | SPI_NOR_DUAL_READ | - SPI_NOR_QUAD_READ | - SPI_NOR_4B_OPCODES) }, { "mt25qu02g", INFO(0x20bb22, 0, 64 * 1024, 4096, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) }, /* Micron */ diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 30621c67721a..604772fc4a96 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -64,7 +64,7 @@ static int self_check_seen(struct ubi_device *ubi, unsigned long *seen) return 0; for (pnum = 0; pnum < ubi->peb_count; pnum++) { - if (test_bit(pnum, seen) && ubi->lookuptbl[pnum]) { + if (!test_bit(pnum, seen) && ubi->lookuptbl[pnum]) { ubi_err(ubi, "self-check failed for PEB %d, fastmap didn't see it", pnum); ret = -EINVAL; } @@ -1137,7 +1137,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, struct rb_node *tmp_rb; int ret, i, j, free_peb_count, used_peb_count, vol_count; int scrub_peb_count, erase_peb_count; - unsigned long *seen_pebs = NULL; + unsigned long *seen_pebs; fm_raw = ubi->fm_buf; memset(ubi->fm_buf, 0, ubi->fm_size); @@ -1151,7 +1151,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, dvbuf = new_fm_vbuf(ubi, UBI_FM_DATA_VOLUME_ID); if (!dvbuf) { ret = -ENOMEM; - goto out_kfree; + goto out_free_avbuf; } avhdr = ubi_get_vid_hdr(avbuf); @@ -1160,7 +1160,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, seen_pebs = init_seen(ubi); if (IS_ERR(seen_pebs)) { ret = PTR_ERR(seen_pebs); - goto out_kfree; + goto out_free_dvbuf; } spin_lock(&ubi->volumes_lock); @@ -1328,7 +1328,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, ret = ubi_io_write_vid_hdr(ubi, new_fm->e[0]->pnum, avbuf); if (ret) { ubi_err(ubi, "unable to write vid_hdr to fastmap SB!"); - goto out_kfree; + goto out_free_seen; } for (i = 0; i < new_fm->used_blocks; i++) { @@ -1350,7 +1350,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, if (ret) { ubi_err(ubi, "unable to write vid_hdr to PEB %i!", new_fm->e[i]->pnum); - goto out_kfree; + goto out_free_seen; } } @@ -1360,7 +1360,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, if (ret) { ubi_err(ubi, "unable to write fastmap to PEB %i!", new_fm->e[i]->pnum); - goto out_kfree; + goto out_free_seen; } } @@ -1370,10 +1370,13 @@ static int ubi_write_fastmap(struct ubi_device *ubi, ret = self_check_seen(ubi, seen_pebs); dbg_bld("fastmap written!"); -out_kfree: - ubi_free_vid_buf(avbuf); - ubi_free_vid_buf(dvbuf); +out_free_seen: free_seen(seen_pebs); +out_free_dvbuf: + ubi_free_vid_buf(dvbuf); +out_free_avbuf: + ubi_free_vid_buf(avbuf); + out: return ret; } diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 4f2e6910c623..1cc2cd894f87 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1383,26 +1383,31 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) bool do_tx_balance = true; u32 hash_index = 0; const u8 *hash_start = NULL; - struct ipv6hdr *ip6hdr; skb_reset_mac_header(skb); eth_data = eth_hdr(skb); switch (ntohs(skb->protocol)) { case ETH_P_IP: { - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; if (is_broadcast_ether_addr(eth_data->h_dest) || - iph->daddr == ip_bcast || - iph->protocol == IPPROTO_IGMP) { + !pskb_network_may_pull(skb, sizeof(*iph))) { + do_tx_balance = false; + break; + } + iph = ip_hdr(skb); + if (iph->daddr == ip_bcast || iph->protocol == IPPROTO_IGMP) { do_tx_balance = false; break; } hash_start = (char *)&(iph->daddr); hash_size = sizeof(iph->daddr); - } break; - case ETH_P_IPV6: + } + case ETH_P_IPV6: { + const struct ipv6hdr *ip6hdr; + /* IPv6 doesn't really use broadcast mac address, but leave * that here just in case. */ @@ -1419,7 +1424,11 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) break; } - /* Additianally, DAD probes should not be tx-balanced as that + if (!pskb_network_may_pull(skb, sizeof(*ip6hdr))) { + do_tx_balance = false; + break; + } + /* Additionally, DAD probes should not be tx-balanced as that * will lead to false positives for duplicate addresses and * prevent address configuration from working. */ @@ -1429,17 +1438,26 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) break; } - hash_start = (char *)&(ipv6_hdr(skb)->daddr); - hash_size = sizeof(ipv6_hdr(skb)->daddr); + hash_start = (char *)&ip6hdr->daddr; + hash_size = sizeof(ip6hdr->daddr); break; - case ETH_P_IPX: - if (ipx_hdr(skb)->ipx_checksum != IPX_NO_CHECKSUM) { + } + case ETH_P_IPX: { + const struct ipxhdr *ipxhdr; + + if (pskb_network_may_pull(skb, sizeof(*ipxhdr))) { + do_tx_balance = false; + break; + } + ipxhdr = (struct ipxhdr *)skb_network_header(skb); + + if (ipxhdr->ipx_checksum != IPX_NO_CHECKSUM) { /* something is wrong with this packet */ do_tx_balance = false; break; } - if (ipx_hdr(skb)->ipx_type != IPX_TYPE_NCP) { + if (ipxhdr->ipx_type != IPX_TYPE_NCP) { /* The only protocol worth balancing in * this family since it has an "ARP" like * mechanism @@ -1448,9 +1466,11 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) break; } + eth_data = eth_hdr(skb); hash_start = (char *)eth_data->h_dest; hash_size = ETH_ALEN; break; + } case ETH_P_ARP: do_tx_balance = false; if (bond_info->rlb_enabled) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index a7132c1593c3..7ed667b304d1 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -680,7 +680,7 @@ int b53_configure_vlan(struct dsa_switch *ds) b53_do_vlan_op(dev, VTA_CMD_CLEAR); } - b53_enable_vlan(dev, false, ds->vlan_filtering); + b53_enable_vlan(dev, dev->vlan_enabled, ds->vlan_filtering); b53_for_each_port(dev, i) b53_write16(dev, B53_VLAN_PAGE, diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 47b21096b577..fecd5e674e04 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -68,7 +68,9 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) /* Force link status for IMP port */ reg = core_readl(priv, offset); - reg |= (MII_SW_OR | LINK_STS | GMII_SPEED_UP_2G); + reg |= (MII_SW_OR | LINK_STS); + if (priv->type == BCM7278_DEVICE_ID) + reg |= GMII_SPEED_UP_2G; core_writel(priv, reg, offset); /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */ diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index c5f64959a184..1142768969c2 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -101,6 +101,12 @@ static struct spi_driver ksz9477_spi_driver = { module_spi_driver(ksz9477_spi_driver); +MODULE_ALIAS("spi:ksz9477"); +MODULE_ALIAS("spi:ksz9897"); +MODULE_ALIAS("spi:ksz9893"); +MODULE_ALIAS("spi:ksz9563"); +MODULE_ALIAS("spi:ksz8563"); +MODULE_ALIAS("spi:ksz9567"); MODULE_AUTHOR("Woojung Huh "); MODULE_DESCRIPTION("Microchip KSZ9477 Series Switch SPI access Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index b4c664957266..4a27577e137b 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2728,6 +2728,9 @@ static int __maybe_unused bcm_sysport_resume(struct device *d) umac_reset(priv); + /* Disable the UniMAC RX/TX */ + umac_enable_set(priv, CMD_RX_EN | CMD_TX_EN, 0); + /* We may have been suspended and never received a WOL event that * would turn off MPD detection, take care of that now */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index cf292f7c3d3c..41297533b4a8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7873,7 +7873,7 @@ static void bnxt_setup_msix(struct bnxt *bp) int tcs, i; tcs = netdev_get_num_tc(dev); - if (tcs > 1) { + if (tcs) { int i, off, count; for (i = 0; i < tcs; i++) { @@ -9273,10 +9273,6 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bnxt_debug_dev_exit(bp); bnxt_disable_napi(bp); del_timer_sync(&bp->timer); - if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state) && - pci_is_enabled(bp->pdev)) - pci_disable_device(bp->pdev); - bnxt_free_skbs(bp); /* Save ring stats before shutdown */ @@ -10052,8 +10048,15 @@ static void bnxt_fw_reset_close(struct bnxt *bp) { __bnxt_close_nic(bp, true, false); bnxt_ulp_irq_stop(bp); + /* When firmware is fatal state, disable PCI device to prevent + * any potential bad DMAs before freeing kernel memory. + */ + if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) + pci_disable_device(bp->pdev); bnxt_clear_int_mode(bp); bnxt_hwrm_func_drv_unrgtr(bp); + if (pci_is_enabled(bp->pdev)) + pci_disable_device(bp->pdev); bnxt_free_ctx_mem(bp); kfree(bp->ctx); bp->ctx = NULL; @@ -11359,9 +11362,9 @@ static void bnxt_remove_one(struct pci_dev *pdev) bnxt_sriov_disable(bp); bnxt_dl_fw_reporters_destroy(bp, true); - bnxt_dl_unregister(bp); pci_disable_pcie_error_reporting(pdev); unregister_netdev(dev); + bnxt_dl_unregister(bp); bnxt_shutdown_tc(bp); bnxt_cancel_sp_work(bp); bp->sp_event = 0; @@ -11850,11 +11853,14 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) bnxt_init_tc(bp); } + bnxt_dl_register(bp); + rc = register_netdev(dev); if (rc) - goto init_err_cleanup_tc; + goto init_err_cleanup; - bnxt_dl_register(bp); + if (BNXT_PF(bp)) + devlink_port_type_eth_set(&bp->dl_port, bp->dev); bnxt_dl_fw_reporters_create(bp); netdev_info(dev, "%s found at mem %lx, node addr %pM\n", @@ -11864,7 +11870,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; -init_err_cleanup_tc: +init_err_cleanup: + bnxt_dl_unregister(bp); bnxt_shutdown_tc(bp); bnxt_clear_int_mode(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 1e236e74ff2f..2d817ba0602c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -482,7 +482,6 @@ int bnxt_dl_register(struct bnxt *bp) netdev_err(bp->dev, "devlink_port_register failed"); goto err_dl_param_unreg; } - devlink_port_type_eth_set(&bp->dl_port, bp->dev); rc = devlink_port_params_register(&bp->dl_port, bnxt_dl_port_params, ARRAY_SIZE(bnxt_dl_port_params)); diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index f496b248bda3..95a94507cec1 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -73,7 +73,11 @@ struct sifive_fu540_macb_mgmt { /* Max length of transmit frame must be a multiple of 8 bytes */ #define MACB_TX_LEN_ALIGN 8 #define MACB_MAX_TX_LEN ((unsigned int)((1 << MACB_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1))) -#define GEM_MAX_TX_LEN ((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1))) +/* Limit maximum TX length as per Cadence TSO errata. This is to avoid a + * false amba_error in TX path from the DMA assuming there is not enough + * space in the SRAM (16KB) even when there is. + */ +#define GEM_MAX_TX_LEN (unsigned int)(0x3FC0) #define GEM_MTU_MIN_SIZE ETH_MIN_MTU #define MACB_NETIF_LSO NETIF_F_TSO @@ -1664,16 +1668,14 @@ static netdev_features_t macb_features_check(struct sk_buff *skb, /* Validate LSO compatibility */ - /* there is only one buffer */ - if (!skb_is_nonlinear(skb)) + /* there is only one buffer or protocol is not UDP */ + if (!skb_is_nonlinear(skb) || (ip_hdr(skb)->protocol != IPPROTO_UDP)) return features; /* length of header */ hdrlen = skb_transport_offset(skb); - if (ip_hdr(skb)->protocol == IPPROTO_TCP) - hdrlen += tcp_hdrlen(skb); - /* For LSO: + /* For UFO only: * When software supplies two or more payload buffers all payload buffers * apart from the last must be a multiple of 8 bytes in size. */ diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c index 0efdbd1a4a6f..32d470d4122a 100644 --- a/drivers/net/ethernet/dec/tulip/dmfe.c +++ b/drivers/net/ethernet/dec/tulip/dmfe.c @@ -2214,15 +2214,16 @@ static int __init dmfe_init_module(void) if (cr6set) dmfe_cr6_user_set = cr6set; - switch(mode) { - case DMFE_10MHF: + switch (mode) { + case DMFE_10MHF: case DMFE_100MHF: case DMFE_10MFD: case DMFE_100MFD: case DMFE_1M_HPNA: dmfe_media_mode = mode; break; - default:dmfe_media_mode = DMFE_AUTO; + default: + dmfe_media_mode = DMFE_AUTO; break; } diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c index b1f30b194300..117ffe08800d 100644 --- a/drivers/net/ethernet/dec/tulip/uli526x.c +++ b/drivers/net/ethernet/dec/tulip/uli526x.c @@ -1809,8 +1809,8 @@ static int __init uli526x_init_module(void) if (cr6set) uli526x_cr6_user_set = cr6set; - switch (mode) { - case ULI526X_10MHF: + switch (mode) { + case ULI526X_10MHF: case ULI526X_100MHF: case ULI526X_10MFD: case ULI526X_100MFD: diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index fcbe01f61aa4..e130233b5085 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2483,6 +2483,9 @@ static void dpaa_adjust_link(struct net_device *net_dev) mac_dev->adjust_link(mac_dev); } +/* The Aquantia PHYs are capable of performing rate adaptation */ +#define PHY_VEND_AQUANTIA 0x03a1b400 + static int dpaa_phy_init(struct net_device *net_dev) { __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; @@ -2501,9 +2504,14 @@ static int dpaa_phy_init(struct net_device *net_dev) return -ENODEV; } - /* Remove any features not supported by the controller */ - ethtool_convert_legacy_u32_to_link_mode(mask, mac_dev->if_support); - linkmode_and(phy_dev->supported, phy_dev->supported, mask); + /* Unless the PHY is capable of rate adaptation */ + if (mac_dev->phy_if != PHY_INTERFACE_MODE_XGMII || + ((phy_dev->drv->phy_id & GENMASK(31, 10)) != PHY_VEND_AQUANTIA)) { + /* remove any features not supported by the controller */ + ethtool_convert_legacy_u32_to_link_mode(mask, + mac_dev->if_support); + linkmode_and(phy_dev->supported, phy_dev->supported, mask); + } phy_support_asym_pause(phy_dev); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index e49820675c8c..6b1a81df1465 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -388,6 +388,8 @@ struct mvneta_pcpu_stats { struct u64_stats_sync syncp; u64 rx_packets; u64 rx_bytes; + u64 rx_dropped; + u64 rx_errors; u64 tx_packets; u64 tx_bytes; }; @@ -706,6 +708,8 @@ mvneta_get_stats64(struct net_device *dev, struct mvneta_pcpu_stats *cpu_stats; u64 rx_packets; u64 rx_bytes; + u64 rx_dropped; + u64 rx_errors; u64 tx_packets; u64 tx_bytes; @@ -714,19 +718,20 @@ mvneta_get_stats64(struct net_device *dev, start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); rx_packets = cpu_stats->rx_packets; rx_bytes = cpu_stats->rx_bytes; + rx_dropped = cpu_stats->rx_dropped; + rx_errors = cpu_stats->rx_errors; tx_packets = cpu_stats->tx_packets; tx_bytes = cpu_stats->tx_bytes; } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); stats->rx_packets += rx_packets; stats->rx_bytes += rx_bytes; + stats->rx_dropped += rx_dropped; + stats->rx_errors += rx_errors; stats->tx_packets += tx_packets; stats->tx_bytes += tx_bytes; } - stats->rx_errors = dev->stats.rx_errors; - stats->rx_dropped = dev->stats.rx_dropped; - stats->tx_dropped = dev->stats.tx_dropped; } @@ -1703,8 +1708,14 @@ static u32 mvneta_txq_desc_csum(int l3_offs, int l3_proto, static void mvneta_rx_error(struct mvneta_port *pp, struct mvneta_rx_desc *rx_desc) { + struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); u32 status = rx_desc->status; + /* update per-cpu counter */ + u64_stats_update_begin(&stats->syncp); + stats->rx_errors++; + u64_stats_update_end(&stats->syncp); + switch (status & MVNETA_RXD_ERR_CODE_MASK) { case MVNETA_RXD_ERR_CRC: netdev_err(pp->dev, "bad rx status %08x (crc error), size=%d\n", @@ -1965,7 +1976,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi, /* Check errors only for FIRST descriptor */ if (rx_status & MVNETA_RXD_ERR_SUMMARY) { mvneta_rx_error(pp, rx_desc); - dev->stats.rx_errors++; /* leave the descriptor untouched */ continue; } @@ -1976,11 +1986,17 @@ static int mvneta_rx_swbm(struct napi_struct *napi, skb_size = max(rx_copybreak, rx_header_size); rxq->skb = netdev_alloc_skb_ip_align(dev, skb_size); if (unlikely(!rxq->skb)) { + struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); + netdev_err(dev, "Can't allocate skb on queue %d\n", rxq->id); - dev->stats.rx_dropped++; + rxq->skb_alloc_err++; + + u64_stats_update_begin(&stats->syncp); + stats->rx_dropped++; + u64_stats_update_end(&stats->syncp); continue; } copy_size = min(skb_size, rx_bytes); @@ -2137,7 +2153,6 @@ err_drop_frame_ret_pool: mvneta_bm_pool_put_bp(pp->bm_priv, bm_pool, rx_desc->buf_phys_addr); err_drop_frame: - dev->stats.rx_errors++; mvneta_rx_error(pp, rx_desc); /* leave the descriptor untouched */ continue; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h index d787bc0a4155..e09bc3858d57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h @@ -45,7 +45,7 @@ void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id); static inline bool mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) { - if (!MLX5_CAP_GEN(mdev, tls)) + if (!MLX5_CAP_GEN(mdev, tls_tx)) return false; if (!MLX5_CAP_GEN(mdev, log_max_dek)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c index 71384ad1a443..ef1ed15a53b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c @@ -269,7 +269,7 @@ struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev, int datalen; u32 skb_seq; - if (MLX5_CAP_GEN(sq->channel->mdev, tls)) { + if (MLX5_CAP_GEN(sq->channel->mdev, tls_tx)) { skb = mlx5e_ktls_handle_tx_skb(netdev, sq, skb, wqe, pi); goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index c76da309506b..72232e570af7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -850,6 +850,7 @@ void mlx5_fpga_ipsec_delete_sa_ctx(void *context) mutex_lock(&fpga_xfrm->lock); if (!--fpga_xfrm->num_rules) { mlx5_fpga_ipsec_release_sa_ctx(fpga_xfrm->sa_ctx); + kfree(fpga_xfrm->sa_ctx); fpga_xfrm->sa_ctx = NULL; } mutex_unlock(&fpga_xfrm->lock); @@ -1478,7 +1479,7 @@ int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs))) return 0; - if (!mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) { + if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) { mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 791e14ac26f4..86e6bbb57482 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1555,16 +1555,16 @@ struct match_list_head { struct match_list first; }; -static void free_match_list(struct match_list_head *head) +static void free_match_list(struct match_list_head *head, bool ft_locked) { if (!list_empty(&head->list)) { struct match_list *iter, *match_tmp; list_del(&head->first.list); - tree_put_node(&head->first.g->node, false); + tree_put_node(&head->first.g->node, ft_locked); list_for_each_entry_safe(iter, match_tmp, &head->list, list) { - tree_put_node(&iter->g->node, false); + tree_put_node(&iter->g->node, ft_locked); list_del(&iter->list); kfree(iter); } @@ -1573,7 +1573,8 @@ static void free_match_list(struct match_list_head *head) static int build_match_list(struct match_list_head *match_head, struct mlx5_flow_table *ft, - const struct mlx5_flow_spec *spec) + const struct mlx5_flow_spec *spec, + bool ft_locked) { struct rhlist_head *tmp, *list; struct mlx5_flow_group *g; @@ -1598,7 +1599,7 @@ static int build_match_list(struct match_list_head *match_head, curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); if (!curr_match) { - free_match_list(match_head); + free_match_list(match_head, ft_locked); err = -ENOMEM; goto out; } @@ -1778,7 +1779,7 @@ search_again_locked: version = atomic_read(&ft->node.version); /* Collect all fgs which has a matching match_criteria */ - err = build_match_list(&match_head, ft, spec); + err = build_match_list(&match_head, ft, spec, take_write); if (err) { if (take_write) up_write_ref_node(&ft->node, false); @@ -1792,7 +1793,7 @@ search_again_locked: rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest, dest_num, version); - free_match_list(&match_head); + free_match_list(&match_head, take_write); if (!IS_ERR(rule) || (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) { if (take_write) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index a19790dee7b2..13e86f0b42f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -239,7 +239,7 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } - if (MLX5_CAP_GEN(dev, tls)) { + if (MLX5_CAP_GEN(dev, tls_tx)) { err = mlx5_core_get_caps(dev, MLX5_CAP_TLS); if (err) return err; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h index 5bfdda19f64d..d8745f87f065 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_if.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h @@ -862,7 +862,7 @@ struct ionic_rxq_comp { #define IONIC_RXQ_COMP_CSUM_F_VLAN 0x40 #define IONIC_RXQ_COMP_CSUM_F_CALC 0x80 u8 pkt_type_color; -#define IONIC_RXQ_COMP_PKT_TYPE_MASK 0x0f +#define IONIC_RXQ_COMP_PKT_TYPE_MASK 0x7f }; enum ionic_pkt_type { diff --git a/drivers/net/ethernet/qlogic/qed/qed_ptp.c b/drivers/net/ethernet/qlogic/qed/qed_ptp.c index 0dacf2c18c09..3e613058e225 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ptp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ptp.c @@ -44,8 +44,8 @@ /* Add/subtract the Adjustment_Value when making a Drift adjustment */ #define QED_DRIFT_CNTR_DIRECTION_SHIFT 31 #define QED_TIMESTAMP_MASK BIT(16) -/* Param mask for Hardware to detect/timestamp the unicast PTP packets */ -#define QED_PTP_UCAST_PARAM_MASK 0xF +/* Param mask for Hardware to detect/timestamp the L2/L4 unicast PTP packets */ +#define QED_PTP_UCAST_PARAM_MASK 0x70F static enum qed_resc_lock qed_ptcdev_to_resc(struct qed_hwfn *p_hwfn) { diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 8d88e4083456..7b65e79d6ae9 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -936,7 +936,7 @@ static void smc911x_phy_configure(struct work_struct *work) if (lp->ctl_rspeed != 100) my_ad_caps &= ~(ADVERTISE_100BASE4|ADVERTISE_100FULL|ADVERTISE_100HALF); - if (!lp->ctl_rfduplx) + if (!lp->ctl_rfduplx) my_ad_caps &= ~(ADVERTISE_100FULL|ADVERTISE_10FULL); /* Update our Auto-Neg Advertisement Register */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 7ec895407d23..e0a5fe83d8e0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -413,6 +413,7 @@ static int ethqos_configure(struct qcom_ethqos *ethqos) dll_lock = rgmii_readl(ethqos, SDC4_STATUS); if (dll_lock & SDC4_STATUS_DLL_LOCK) break; + retry--; } while (retry > 0); if (!retry) dev_err(ðqos->pdev->dev, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 06dd65c419c4..582176d869c3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4763,6 +4763,7 @@ int stmmac_suspend(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct stmmac_priv *priv = netdev_priv(ndev); + u32 chan; if (!ndev || !netif_running(ndev)) return 0; @@ -4776,6 +4777,9 @@ int stmmac_suspend(struct device *dev) stmmac_disable_all_queues(priv); + for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++) + del_timer_sync(&priv->tx_queue[chan].txtimer); + /* Stop TX/RX DMA */ stmmac_stop_all_dma(priv); diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 9b3ba98726d7..3a53d222bfcc 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -767,12 +767,12 @@ static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize) int i; gtp->addr_hash = kmalloc_array(hsize, sizeof(struct hlist_head), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (gtp->addr_hash == NULL) return -ENOMEM; gtp->tid_hash = kmalloc_array(hsize, sizeof(struct hlist_head), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (gtp->tid_hash == NULL) goto err1; diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 44c2d857a7fa..91b302f0192f 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -73,7 +73,7 @@ static const struct file_operations nsim_dev_take_snapshot_fops = { static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) { - char dev_ddir_name[16]; + char dev_ddir_name[sizeof(DRV_NAME) + 10]; sprintf(dev_ddir_name, DRV_NAME "%u", nsim_dev->nsim_bus_dev->dev.id); nsim_dev->ddir = debugfs_create_dir(dev_ddir_name, nsim_dev_ddir); diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index a7b9cf3269bf..29a0917a81e6 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c @@ -874,15 +874,15 @@ ppp_async_input(struct asyncppp *ap, const unsigned char *buf, skb = dev_alloc_skb(ap->mru + PPP_HDRLEN + 2); if (!skb) goto nomem; - ap->rpkt = skb; - } - if (skb->len == 0) { - /* Try to get the payload 4-byte aligned. - * This should match the - * PPP_ALLSTATIONS/PPP_UI/compressed tests in - * process_input_packet, but we do not have - * enough chars here to test buf[1] and buf[2]. - */ + ap->rpkt = skb; + } + if (skb->len == 0) { + /* Try to get the payload 4-byte aligned. + * This should match the + * PPP_ALLSTATIONS/PPP_UI/compressed tests in + * process_input_packet, but we do not have + * enough chars here to test buf[1] and buf[2]. + */ if (buf[0] != PPP_ALLSTATIONS) skb_reserve(skb, 2 + (buf[0] & 1)); } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index 7cdfde9b3dea..575ed19e9195 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -430,6 +430,7 @@ fail: usb_free_urb(req->urb); list_del(q->next); } + kfree(reqs); return NULL; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index b3768d5d852a..8ad2d889179c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -3321,6 +3321,10 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm, igtk_cmd.sta_id = cpu_to_le32(sta_id); if (remove_key) { + /* This is a valid situation for IGTK */ + if (sta_id == IWL_MVM_INVALID_STA) + return 0; + igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_NOT_VALID); } else { struct ieee80211_key_seq seq; @@ -3575,9 +3579,9 @@ int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm, IWL_DEBUG_WEP(mvm, "mvm remove dynamic key: idx=%d sta=%d\n", keyconf->keyidx, sta_id); - if (mvm_sta && (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC || - keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || - keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256)) + if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) return iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id, true); if (!__test_and_clear_bit(keyconf->hw_key_idx, mvm->fw_key_table)) { diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c index 6dd835f1efc2..fbfa0b15d0c8 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c @@ -232,6 +232,7 @@ static int mwifiex_process_country_ie(struct mwifiex_private *priv, if (country_ie_len > (IEEE80211_COUNTRY_STRING_LEN + MWIFIEX_MAX_TRIPLET_802_11D)) { + rcu_read_unlock(); mwifiex_dbg(priv->adapter, ERROR, "11D: country_ie_len overflow!, deauth AP\n"); return -EINVAL; diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c index cda996f6954e..2b83156efe3f 100644 --- a/drivers/nfc/pn544/pn544.c +++ b/drivers/nfc/pn544/pn544.c @@ -693,7 +693,7 @@ static int pn544_hci_check_presence(struct nfc_hci_dev *hdev, target->nfcid1_len != 10) return -EOPNOTSUPP; - return nfc_hci_send_cmd(hdev, NFC_HCI_RF_READER_A_GATE, + return nfc_hci_send_cmd(hdev, NFC_HCI_RF_READER_A_GATE, PN544_RF_READER_CMD_ACTIVATE_NEXT, target->nfcid1, target->nfcid1_len, NULL); } else if (target->supported_protocols & (NFC_PROTO_JEWEL_MASK | diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index d16b55ffe79f..4e9004fe5c6f 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -105,6 +105,7 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) u16 qid = le16_to_cpu(c->qid); u16 sqsize = le16_to_cpu(c->sqsize); struct nvmet_ctrl *old; + u16 ret; old = cmpxchg(&req->sq->ctrl, NULL, ctrl); if (old) { @@ -115,7 +116,8 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) if (!sqsize) { pr_warn("queue size zero!\n"); req->error_loc = offsetof(struct nvmf_connect_command, sqsize); - return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; + ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; + goto err; } /* note: convert queue size from 0's-based value to 1's-based value */ @@ -128,16 +130,19 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) } if (ctrl->ops->install_queue) { - u16 ret = ctrl->ops->install_queue(req->sq); - + ret = ctrl->ops->install_queue(req->sq); if (ret) { pr_err("failed to install queue %d cntlid %d ret %x\n", - qid, ret, ctrl->cntlid); - return ret; + qid, ctrl->cntlid, ret); + goto err; } } return 0; + +err: + req->sq->ctrl = NULL; + return ret; } static void nvmet_execute_admin_connect(struct nvmet_req *req) diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 057d1ff87d5d..960542dea5ad 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -110,7 +110,7 @@ static void nvmem_cell_drop(struct nvmem_cell *cell) list_del(&cell->node); mutex_unlock(&nvmem_mutex); of_node_put(cell->np); - kfree(cell->name); + kfree_const(cell->name); kfree(cell); } @@ -137,7 +137,9 @@ static int nvmem_cell_info_to_nvmem_cell(struct nvmem_device *nvmem, cell->nvmem = nvmem; cell->offset = info->offset; cell->bytes = info->bytes; - cell->name = info->name; + cell->name = kstrdup_const(info->name, GFP_KERNEL); + if (!cell->name) + return -ENOMEM; cell->bit_offset = info->bit_offset; cell->nbits = info->nbits; @@ -327,7 +329,7 @@ static int nvmem_add_cells_from_of(struct nvmem_device *nvmem) dev_err(dev, "cell %s unaligned to nvmem stride %d\n", cell->name, nvmem->stride); /* Cells already added will be freed later. */ - kfree(cell->name); + kfree_const(cell->name); kfree(cell); return -EINVAL; } diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index 37c2ccbefecd..d91618641be6 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -103,4 +103,8 @@ config OF_OVERLAY config OF_NUMA bool +config OF_DMA_DEFAULT_COHERENT + # arches should select this if DMA is coherent by default for OF devices + bool + endif # OF diff --git a/drivers/of/address.c b/drivers/of/address.c index 978427a9d5e6..8f74c4626e0e 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -998,12 +998,16 @@ EXPORT_SYMBOL_GPL(of_dma_get_range); * @np: device node * * It returns true if "dma-coherent" property was found - * for this device in DT. + * for this device in the DT, or if DMA is coherent by + * default for OF devices on the current platform. */ bool of_dma_is_coherent(struct device_node *np) { struct device_node *node = of_node_get(np); + if (IS_ENABLED(CONFIG_OF_DMA_DEFAULT_COHERENT)) + return true; + while (node) { if (of_property_read_bool(node, "dma-coherent")) { of_node_put(node); diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c index af677254a072..c8c702c494a2 100644 --- a/drivers/pci/controller/dwc/pci-keystone.c +++ b/drivers/pci/controller/dwc/pci-keystone.c @@ -422,7 +422,7 @@ static void ks_pcie_setup_rc_app_regs(struct keystone_pcie *ks_pcie) lower_32_bits(start) | OB_ENABLEN); ks_pcie_app_writel(ks_pcie, OB_OFFSET_HI(i), upper_32_bits(start)); - start += OB_WIN_SIZE; + start += OB_WIN_SIZE * SZ_1M; } val = ks_pcie_app_readl(ks_pcie, CMD_STATUS); @@ -510,7 +510,7 @@ static void ks_pcie_stop_link(struct dw_pcie *pci) /* Disable Link training */ val = ks_pcie_app_readl(ks_pcie, CMD_STATUS); val &= ~LTSSM_EN_VAL; - ks_pcie_app_writel(ks_pcie, CMD_STATUS, LTSSM_EN_VAL | val); + ks_pcie_app_writel(ks_pcie, CMD_STATUS, val); } static int ks_pcie_start_link(struct dw_pcie *pci) @@ -1354,7 +1354,7 @@ static int __init ks_pcie_probe(struct platform_device *pdev) ret = of_property_read_u32(np, "num-viewport", &num_viewport); if (ret < 0) { dev_err(dev, "unable to read *num-viewport* property\n"); - return ret; + goto err_get_sync; } /* diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c index 673a1725ef38..090b632965e2 100644 --- a/drivers/pci/controller/pci-tegra.c +++ b/drivers/pci/controller/pci-tegra.c @@ -2798,7 +2798,7 @@ static int tegra_pcie_probe(struct platform_device *pdev) pm_runtime_enable(pcie->dev); err = pm_runtime_get_sync(pcie->dev); - if (err) { + if (err < 0) { dev_err(dev, "fail to enable pcie controller: %d\n", err); goto teardown_msi; } diff --git a/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c b/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c index 42bc5150dd92..febe0aef68d4 100644 --- a/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c +++ b/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c @@ -80,7 +80,7 @@ static int read_poll_timeout(void __iomem *addr, u32 mask) if (readl_relaxed(addr) & mask) return 0; - usleep_range(DELAY_INTERVAL_US, DELAY_INTERVAL_US + 50); + usleep_range(DELAY_INTERVAL_US, DELAY_INTERVAL_US + 50); } while (!time_after(jiffies, timeout)); return (readl_relaxed(addr) & mask) ? 0 : -ETIMEDOUT; diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index cdab916fbf92..e330ec73c465 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -67,26 +67,22 @@ struct intel_scu_ipc_pdata_t { u32 i2c_base; u32 i2c_len; - u8 irq_mode; }; static const struct intel_scu_ipc_pdata_t intel_scu_ipc_lincroft_pdata = { .i2c_base = 0xff12b000, .i2c_len = 0x10, - .irq_mode = 0, }; /* Penwell and Cloverview */ static const struct intel_scu_ipc_pdata_t intel_scu_ipc_penwell_pdata = { .i2c_base = 0xff12b000, .i2c_len = 0x10, - .irq_mode = 1, }; static const struct intel_scu_ipc_pdata_t intel_scu_ipc_tangier_pdata = { .i2c_base = 0xff00d000, .i2c_len = 0x10, - .irq_mode = 0, }; struct intel_scu_ipc_dev { @@ -99,6 +95,9 @@ struct intel_scu_ipc_dev { static struct intel_scu_ipc_dev ipcdev; /* Only one for now */ +#define IPC_STATUS 0x04 +#define IPC_STATUS_IRQ BIT(2) + /* * IPC Read Buffer (Read Only): * 16 byte buffer for receiving data from SCU, if IPC command @@ -120,11 +119,8 @@ static DEFINE_MUTEX(ipclock); /* lock used to prevent multiple call to SCU */ */ static inline void ipc_command(struct intel_scu_ipc_dev *scu, u32 cmd) { - if (scu->irq_mode) { - reinit_completion(&scu->cmd_complete); - writel(cmd | IPC_IOC, scu->ipc_base); - } - writel(cmd, scu->ipc_base); + reinit_completion(&scu->cmd_complete); + writel(cmd | IPC_IOC, scu->ipc_base); } /* @@ -610,9 +606,10 @@ EXPORT_SYMBOL(intel_scu_ipc_i2c_cntrl); static irqreturn_t ioc(int irq, void *dev_id) { struct intel_scu_ipc_dev *scu = dev_id; + int status = ipc_read_status(scu); - if (scu->irq_mode) - complete(&scu->cmd_complete); + writel(status | IPC_STATUS_IRQ, scu->ipc_base + IPC_STATUS); + complete(&scu->cmd_complete); return IRQ_HANDLED; } @@ -638,8 +635,6 @@ static int ipc_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (!pdata) return -ENODEV; - scu->irq_mode = pdata->irq_mode; - err = pcim_enable_device(pdev); if (err) return err; diff --git a/drivers/power/supply/axp20x_ac_power.c b/drivers/power/supply/axp20x_ac_power.c index 0d34a932b6d5..f74b0556bb6b 100644 --- a/drivers/power/supply/axp20x_ac_power.c +++ b/drivers/power/supply/axp20x_ac_power.c @@ -23,6 +23,8 @@ #define AXP20X_PWR_STATUS_ACIN_PRESENT BIT(7) #define AXP20X_PWR_STATUS_ACIN_AVAIL BIT(6) +#define AXP813_ACIN_PATH_SEL BIT(7) + #define AXP813_VHOLD_MASK GENMASK(5, 3) #define AXP813_VHOLD_UV_TO_BIT(x) ((((x) / 100000) - 40) << 3) #define AXP813_VHOLD_REG_TO_UV(x) \ @@ -40,6 +42,7 @@ struct axp20x_ac_power { struct power_supply *supply; struct iio_channel *acin_v; struct iio_channel *acin_i; + bool has_acin_path_sel; }; static irqreturn_t axp20x_ac_power_irq(int irq, void *devid) @@ -86,6 +89,17 @@ static int axp20x_ac_power_get_property(struct power_supply *psy, return ret; val->intval = !!(reg & AXP20X_PWR_STATUS_ACIN_AVAIL); + + /* ACIN_PATH_SEL disables ACIN even if ACIN_AVAIL is set. */ + if (val->intval && power->has_acin_path_sel) { + ret = regmap_read(power->regmap, AXP813_ACIN_PATH_CTRL, + ®); + if (ret) + return ret; + + val->intval = !!(reg & AXP813_ACIN_PATH_SEL); + } + return 0; case POWER_SUPPLY_PROP_VOLTAGE_NOW: @@ -224,21 +238,25 @@ static const struct power_supply_desc axp813_ac_power_desc = { struct axp_data { const struct power_supply_desc *power_desc; bool acin_adc; + bool acin_path_sel; }; static const struct axp_data axp20x_data = { - .power_desc = &axp20x_ac_power_desc, - .acin_adc = true, + .power_desc = &axp20x_ac_power_desc, + .acin_adc = true, + .acin_path_sel = false, }; static const struct axp_data axp22x_data = { - .power_desc = &axp22x_ac_power_desc, - .acin_adc = false, + .power_desc = &axp22x_ac_power_desc, + .acin_adc = false, + .acin_path_sel = false, }; static const struct axp_data axp813_data = { - .power_desc = &axp813_ac_power_desc, - .acin_adc = false, + .power_desc = &axp813_ac_power_desc, + .acin_adc = false, + .acin_path_sel = true, }; static int axp20x_ac_power_probe(struct platform_device *pdev) @@ -282,6 +300,7 @@ static int axp20x_ac_power_probe(struct platform_device *pdev) } power->regmap = dev_get_regmap(pdev->dev.parent, NULL); + power->has_acin_path_sel = axp_data->acin_path_sel; platform_set_drvdata(pdev, power); diff --git a/drivers/power/supply/ltc2941-battery-gauge.c b/drivers/power/supply/ltc2941-battery-gauge.c index da49436176cd..30a9014b2f95 100644 --- a/drivers/power/supply/ltc2941-battery-gauge.c +++ b/drivers/power/supply/ltc2941-battery-gauge.c @@ -449,7 +449,7 @@ static int ltc294x_i2c_remove(struct i2c_client *client) { struct ltc294x_info *info = i2c_get_clientdata(client); - cancel_delayed_work(&info->work); + cancel_delayed_work_sync(&info->work); power_supply_unregister(info->supply); return 0; } diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c index ca3dc3f3bb29..bb16c465426e 100644 --- a/drivers/regulator/helpers.c +++ b/drivers/regulator/helpers.c @@ -13,6 +13,8 @@ #include #include +#include "internal.h" + /** * regulator_is_enabled_regmap - standard is_enabled() for regmap users * @@ -881,3 +883,15 @@ void regulator_bulk_set_supply_names(struct regulator_bulk_data *consumers, consumers[i].supply = supply_names[i]; } EXPORT_SYMBOL_GPL(regulator_bulk_set_supply_names); + +/** + * regulator_is_equal - test whether two regulators are the same + * + * @reg1: first regulator to operate on + * @reg2: second regulator to operate on + */ +bool regulator_is_equal(struct regulator *reg1, struct regulator *reg2) +{ + return reg1->rdev == reg2->rdev; +} +EXPORT_SYMBOL_GPL(regulator_is_equal); diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c index 469d0bc9f5fe..00cf33573136 100644 --- a/drivers/scsi/csiostor/csio_scsi.c +++ b/drivers/scsi/csiostor/csio_scsi.c @@ -1383,7 +1383,7 @@ csio_device_reset(struct device *dev, return -EINVAL; /* Delete NPIV lnodes */ - csio_lnodes_exit(hw, 1); + csio_lnodes_exit(hw, 1); /* Block upper IOs */ csio_lnodes_block_request(hw); diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 42cf38c1ea99..0cbe6740e0c9 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -4392,7 +4392,8 @@ dcmd_timeout_ocr_possible(struct megasas_instance *instance) { if (instance->adapter_type == MFI_SERIES) return KILL_ADAPTER; else if (instance->unload || - test_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags)) + test_bit(MEGASAS_FUSION_OCR_NOT_POSSIBLE, + &instance->reset_flags)) return IGNORE_TIMEOUT; else return INITIATE_OCR; diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index e301458bcbae..46bc062d873e 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -4847,6 +4847,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason) if (instance->requestorId && !instance->skip_heartbeat_timer_del) del_timer_sync(&instance->sriov_heartbeat_timer); set_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags); + set_bit(MEGASAS_FUSION_OCR_NOT_POSSIBLE, &instance->reset_flags); atomic_set(&instance->adprecovery, MEGASAS_ADPRESET_SM_POLLING); instance->instancet->disable_intr(instance); megasas_sync_irqs((unsigned long)instance); @@ -5046,7 +5047,7 @@ kill_hba: instance->skip_heartbeat_timer_del = 1; retval = FAILED; out: - clear_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags); + clear_bit(MEGASAS_FUSION_OCR_NOT_POSSIBLE, &instance->reset_flags); mutex_unlock(&instance->reset_mutex); return retval; } diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.h b/drivers/scsi/megaraid/megaraid_sas_fusion.h index c013c80fe4e6..dd2e37e40d6b 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.h +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.h @@ -89,6 +89,7 @@ enum MR_RAID_FLAGS_IO_SUB_TYPE { #define MEGASAS_FP_CMD_LEN 16 #define MEGASAS_FUSION_IN_RESET 0 +#define MEGASAS_FUSION_OCR_NOT_POSSIBLE 1 #define RAID_1_PEER_CMDS 2 #define JBOD_MAPS_COUNT 2 #define MEGASAS_REDUCE_QD_COUNT 64 diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 30afc59c1870..7bbff91f8883 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -2519,12 +2519,6 @@ qla83xx_fw_dump_failed: /* Driver Debug Functions. */ /****************************************************************************/ -static inline int -ql_mask_match(uint level) -{ - return (level & ql2xextended_error_logging) == level; -} - /* * This function is for formatting and logging debug information. * It is to be used when vha is available. It formats the message diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h index bb01b680ce9f..433e95502808 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.h +++ b/drivers/scsi/qla2xxx/qla_dbg.h @@ -374,3 +374,9 @@ extern int qla24xx_dump_ram(struct qla_hw_data *, uint32_t, uint32_t *, extern void qla24xx_pause_risc(struct device_reg_24xx __iomem *, struct qla_hw_data *); extern int qla24xx_soft_reset(struct qla_hw_data *); + +static inline int +ql_mask_match(uint level) +{ + return (level & ql2xextended_error_logging) == level; +} diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 1eb3fe281cc3..c57b95a20688 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -2402,6 +2402,7 @@ typedef struct fc_port { unsigned int scan_needed:1; unsigned int n2n_flag:1; unsigned int explicit_logout:1; + unsigned int prli_pend_timer:1; struct completion nvme_del_done; uint32_t nvme_prli_service_param; @@ -2428,6 +2429,7 @@ typedef struct fc_port { struct work_struct free_work; struct work_struct reg_work; uint64_t jiffies_at_registration; + unsigned long prli_expired; struct qlt_plogi_ack_t *plogi_link[QLT_PLOGI_LINK_MAX]; uint16_t tgt_id; @@ -4821,6 +4823,9 @@ struct sff_8247_a0 { ha->current_topology == ISP_CFG_N || \ !ha->current_topology) +#define PRLI_PHASE(_cls) \ + ((_cls == DSC_LS_PRLI_PEND) || (_cls == DSC_LS_PRLI_COMP)) + #include "qla_target.h" #include "qla_gbl.h" #include "qla_dbg.h" diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 9ffaa920fc8f..ac4c47fc5f4c 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -686,7 +686,7 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, port_id_t id; u64 wwn; u16 data[2]; - u8 current_login_state; + u8 current_login_state, nvme_cls; fcport = ea->fcport; ql_dbg(ql_dbg_disc, vha, 0xffff, @@ -745,10 +745,17 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, loop_id = le16_to_cpu(e->nport_handle); loop_id = (loop_id & 0x7fff); - if (fcport->fc4f_nvme) - current_login_state = e->current_login_state >> 4; - else - current_login_state = e->current_login_state & 0xf; + nvme_cls = e->current_login_state >> 4; + current_login_state = e->current_login_state & 0xf; + + if (PRLI_PHASE(nvme_cls)) { + current_login_state = nvme_cls; + fcport->fc4_type &= ~FS_FC4TYPE_FCP; + fcport->fc4_type |= FS_FC4TYPE_NVME; + } else if (PRLI_PHASE(current_login_state)) { + fcport->fc4_type |= FS_FC4TYPE_FCP; + fcport->fc4_type &= ~FS_FC4TYPE_NVME; + } ql_dbg(ql_dbg_disc, vha, 0x20e2, @@ -1219,12 +1226,19 @@ qla24xx_async_prli(struct scsi_qla_host *vha, fc_port_t *fcport) struct srb_iocb *lio; int rval = QLA_FUNCTION_FAILED; - if (!vha->flags.online) + if (!vha->flags.online) { + ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %d %8phC exit\n", + __func__, __LINE__, fcport->port_name); return rval; + } - if (fcport->fw_login_state == DSC_LS_PLOGI_PEND || - fcport->fw_login_state == DSC_LS_PRLI_PEND) + if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND || + fcport->fw_login_state == DSC_LS_PRLI_PEND) && + qla_dual_mode_enabled(vha)) { + ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %d %8phC exit\n", + __func__, __LINE__, fcport->port_name); return rval; + } sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) @@ -1602,6 +1616,10 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) break; default: if (fcport->login_pause) { + ql_dbg(ql_dbg_disc, vha, 0x20d8, + "%s %d %8phC exit\n", + __func__, __LINE__, + fcport->port_name); fcport->last_rscn_gen = fcport->rscn_gen; fcport->last_login_gen = fcport->login_gen; set_bit(RELOGIN_NEEDED, &vha->dpc_flags); diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 7c5f2736ebee..3e9c5768815e 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1897,6 +1897,18 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, inbuf = (uint32_t *)&sts->nvme_ersp_data; outbuf = (uint32_t *)fd->rspaddr; iocb->u.nvme.rsp_pyld_len = le16_to_cpu(sts->nvme_rsp_pyld_len); + if (unlikely(iocb->u.nvme.rsp_pyld_len > + sizeof(struct nvme_fc_ersp_iu))) { + if (ql_mask_match(ql_dbg_io)) { + WARN_ONCE(1, "Unexpected response payload length %u.\n", + iocb->u.nvme.rsp_pyld_len); + ql_log(ql_log_warn, fcport->vha, 0x5100, + "Unexpected response payload length %u.\n", + iocb->u.nvme.rsp_pyld_len); + } + iocb->u.nvme.rsp_pyld_len = + sizeof(struct nvme_fc_ersp_iu); + } iter = iocb->u.nvme.rsp_pyld_len >> 2; for (; iter; iter--) *outbuf++ = swab32(*inbuf++); diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index eac76e934cbe..1ef8907314e5 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -6151,9 +6151,8 @@ qla2x00_dump_mctp_data(scsi_qla_host_t *vha, dma_addr_t req_dma, uint32_t addr, mcp->mb[7] = LSW(MSD(req_dma)); mcp->mb[8] = MSW(addr); /* Setting RAM ID to valid */ - mcp->mb[10] |= BIT_7; /* For MCTP RAM ID is 0x40 */ - mcp->mb[10] |= 0x40; + mcp->mb[10] = BIT_7 | 0x40; mcp->out_mb |= MBX_10|MBX_8|MBX_7|MBX_6|MBX_5|MBX_4|MBX_3|MBX_2|MBX_1| MBX_0; diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index 2b2028f2383e..c855d013ba8a 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c @@ -1612,8 +1612,7 @@ qla82xx_get_bootld_offset(struct qla_hw_data *ha) return (u8 *)&ha->hablob->fw->data[offset]; } -static __le32 -qla82xx_get_fw_size(struct qla_hw_data *ha) +static u32 qla82xx_get_fw_size(struct qla_hw_data *ha) { struct qla82xx_uri_data_desc *uri_desc = NULL; @@ -1624,7 +1623,7 @@ qla82xx_get_fw_size(struct qla_hw_data *ha) return cpu_to_le32(uri_desc->size); } - return cpu_to_le32(*(u32 *)&ha->hablob->fw->data[FW_SIZE_OFFSET]); + return get_unaligned_le32(&ha->hablob->fw->data[FW_SIZE_OFFSET]); } static u8 * @@ -1816,7 +1815,7 @@ qla82xx_fw_load_from_blob(struct qla_hw_data *ha) } flashaddr = FLASH_ADDR_START; - size = (__force u32)qla82xx_get_fw_size(ha) / 8; + size = qla82xx_get_fw_size(ha) / 8; ptr64 = (u64 *)qla82xx_get_fw_offs(ha); for (i = 0; i < size; i++) { diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 74a378a91b71..cb8a892e2d39 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1257,6 +1257,7 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess) sess->deleted = QLA_SESS_DELETION_IN_PROGRESS; spin_unlock_irqrestore(&sess->vha->work_lock, flags); + sess->prli_pend_timer = 0; sess->disc_state = DSC_DELETE_PEND; qla24xx_chk_fcp_state(sess); diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 2323432a0edb..5504ab11decc 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -4145,7 +4145,7 @@ static void qla4xxx_mem_free(struct scsi_qla_host *ha) dma_free_coherent(&ha->pdev->dev, ha->queues_len, ha->queues, ha->queues_dma); - if (ha->fw_dump) + if (ha->fw_dump) vfree(ha->fw_dump); ha->queues_len = 0; diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 1e38bb967871..0d41a7dc1d6b 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -5023,6 +5023,7 @@ static int ufshcd_disable_auto_bkops(struct ufs_hba *hba) hba->auto_bkops_enabled = false; trace_ufshcd_auto_bkops_state(dev_name(hba->dev), "Disabled"); + hba->is_urgent_bkops_lvl_checked = false; out: return err; } @@ -5047,6 +5048,7 @@ static void ufshcd_force_reset_auto_bkops(struct ufs_hba *hba) hba->ee_ctrl_mask &= ~MASK_EE_URGENT_BKOPS; ufshcd_disable_auto_bkops(hba); } + hba->is_urgent_bkops_lvl_checked = false; } static inline int ufshcd_get_bkops_status(struct ufs_hba *hba, u32 *status) @@ -5093,6 +5095,7 @@ static int ufshcd_bkops_ctrl(struct ufs_hba *hba, err = ufshcd_enable_auto_bkops(hba); else err = ufshcd_disable_auto_bkops(hba); + hba->urgent_bkops_lvl = curr_status; out: return err; } diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 1c8b349379af..77c4a9abe365 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -688,7 +688,9 @@ struct dwc3_ep { #define DWC3_EP_STALL BIT(1) #define DWC3_EP_WEDGE BIT(2) #define DWC3_EP_TRANSFER_STARTED BIT(3) +#define DWC3_EP_END_TRANSFER_PENDING BIT(4) #define DWC3_EP_PENDING_REQUEST BIT(5) +#define DWC3_EP_DELAY_START BIT(6) /* This last one is specific to EP0 */ #define DWC3_EP0_DIR_IN BIT(31) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index fd1b100d2927..6dee4dabc0a4 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -1136,8 +1136,10 @@ void dwc3_ep0_interrupt(struct dwc3 *dwc, case DWC3_DEPEVT_EPCMDCMPLT: cmd = DEPEVT_PARAMETER_CMD(event->parameters); - if (cmd == DWC3_DEPCMD_ENDTRANSFER) + if (cmd == DWC3_DEPCMD_ENDTRANSFER) { + dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING; dep->flags &= ~DWC3_EP_TRANSFER_STARTED; + } break; } } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 154f3f3e8cff..8b95be897078 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1447,6 +1447,12 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req) list_add_tail(&req->list, &dep->pending_list); req->status = DWC3_REQUEST_STATUS_QUEUED; + /* Start the transfer only after the END_TRANSFER is completed */ + if (dep->flags & DWC3_EP_END_TRANSFER_PENDING) { + dep->flags |= DWC3_EP_DELAY_START; + return 0; + } + /* * NOTICE: Isochronous endpoints should NEVER be prestarted. We must * wait for a XferNotReady event so we will know what's the current @@ -2625,8 +2631,14 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc, cmd = DEPEVT_PARAMETER_CMD(event->parameters); if (cmd == DWC3_DEPCMD_ENDTRANSFER) { + dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING; dep->flags &= ~DWC3_EP_TRANSFER_STARTED; dwc3_gadget_ep_cleanup_cancelled_requests(dep); + if ((dep->flags & DWC3_EP_DELAY_START) && + !usb_endpoint_xfer_isoc(dep->endpoint.desc)) + __dwc3_gadget_kick_transfer(dep); + + dep->flags &= ~DWC3_EP_DELAY_START; } break; case DWC3_DEPEVT_STREAMEVT: @@ -2683,7 +2695,8 @@ static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, u32 cmd; int ret; - if (!(dep->flags & DWC3_EP_TRANSFER_STARTED)) + if (!(dep->flags & DWC3_EP_TRANSFER_STARTED) || + (dep->flags & DWC3_EP_END_TRANSFER_PENDING)) return; /* @@ -2728,6 +2741,8 @@ static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, if (!interrupt) dep->flags &= ~DWC3_EP_TRANSFER_STARTED; + else + dep->flags |= DWC3_EP_END_TRANSFER_PENDING; if (dwc3_is_usb31(dwc) || dwc->revision < DWC3_REVISION_310A) udelay(100); diff --git a/drivers/usb/gadget/function/f_ecm.c b/drivers/usb/gadget/function/f_ecm.c index 460d5d7c984f..7f5cf488b2b1 100644 --- a/drivers/usb/gadget/function/f_ecm.c +++ b/drivers/usb/gadget/function/f_ecm.c @@ -52,6 +52,7 @@ struct f_ecm { struct usb_ep *notify; struct usb_request *notify_req; u8 notify_state; + atomic_t notify_count; bool is_open; /* FIXME is_open needs some irq-ish locking @@ -380,7 +381,7 @@ static void ecm_do_notify(struct f_ecm *ecm) int status; /* notification already in flight? */ - if (!req) + if (atomic_read(&ecm->notify_count)) return; event = req->buf; @@ -420,10 +421,10 @@ static void ecm_do_notify(struct f_ecm *ecm) event->bmRequestType = 0xA1; event->wIndex = cpu_to_le16(ecm->ctrl_id); - ecm->notify_req = NULL; + atomic_inc(&ecm->notify_count); status = usb_ep_queue(ecm->notify, req, GFP_ATOMIC); if (status < 0) { - ecm->notify_req = req; + atomic_dec(&ecm->notify_count); DBG(cdev, "notify --> %d\n", status); } } @@ -448,17 +449,19 @@ static void ecm_notify_complete(struct usb_ep *ep, struct usb_request *req) switch (req->status) { case 0: /* no fault */ + atomic_dec(&ecm->notify_count); break; case -ECONNRESET: case -ESHUTDOWN: + atomic_set(&ecm->notify_count, 0); ecm->notify_state = ECM_NOTIFY_NONE; break; default: DBG(cdev, "event %02x --> %d\n", event->bNotificationType, req->status); + atomic_dec(&ecm->notify_count); break; } - ecm->notify_req = req; ecm_do_notify(ecm); } @@ -907,6 +910,11 @@ static void ecm_unbind(struct usb_configuration *c, struct usb_function *f) usb_free_all_descriptors(f); + if (atomic_read(&ecm->notify_count)) { + usb_ep_dequeue(ecm->notify, ecm->notify_req); + atomic_set(&ecm->notify_count, 0); + } + kfree(ecm->notify_req->buf); usb_ep_free_request(ecm->notify, ecm->notify_req); } diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 59d9d512dcda..ced2581cf99f 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1062,6 +1062,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) req->num_sgs = io_data->sgt.nents; } else { req->buf = data; + req->num_sgs = 0; } req->length = data_len; @@ -1105,6 +1106,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) req->num_sgs = io_data->sgt.nents; } else { req->buf = data; + req->num_sgs = 0; } req->length = data_len; diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index 2d6e76e4cffa..1d900081b1f0 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -53,6 +53,7 @@ struct f_ncm { struct usb_ep *notify; struct usb_request *notify_req; u8 notify_state; + atomic_t notify_count; bool is_open; const struct ndp_parser_opts *parser_opts; @@ -547,7 +548,7 @@ static void ncm_do_notify(struct f_ncm *ncm) int status; /* notification already in flight? */ - if (!req) + if (atomic_read(&ncm->notify_count)) return; event = req->buf; @@ -587,7 +588,8 @@ static void ncm_do_notify(struct f_ncm *ncm) event->bmRequestType = 0xA1; event->wIndex = cpu_to_le16(ncm->ctrl_id); - ncm->notify_req = NULL; + atomic_inc(&ncm->notify_count); + /* * In double buffering if there is a space in FIFO, * completion callback can be called right after the call, @@ -597,7 +599,7 @@ static void ncm_do_notify(struct f_ncm *ncm) status = usb_ep_queue(ncm->notify, req, GFP_ATOMIC); spin_lock(&ncm->lock); if (status < 0) { - ncm->notify_req = req; + atomic_dec(&ncm->notify_count); DBG(cdev, "notify --> %d\n", status); } } @@ -632,17 +634,19 @@ static void ncm_notify_complete(struct usb_ep *ep, struct usb_request *req) case 0: VDBG(cdev, "Notification %02x sent\n", event->bNotificationType); + atomic_dec(&ncm->notify_count); break; case -ECONNRESET: case -ESHUTDOWN: + atomic_set(&ncm->notify_count, 0); ncm->notify_state = NCM_NOTIFY_NONE; break; default: DBG(cdev, "event %02x --> %d\n", event->bNotificationType, req->status); + atomic_dec(&ncm->notify_count); break; } - ncm->notify_req = req; ncm_do_notify(ncm); spin_unlock(&ncm->lock); } @@ -1649,6 +1653,11 @@ static void ncm_unbind(struct usb_configuration *c, struct usb_function *f) ncm_string_defs[0].id = 0; usb_free_all_descriptors(f); + if (atomic_read(&ncm->notify_count)) { + usb_ep_dequeue(ncm->notify, ncm->notify_req); + atomic_set(&ncm->notify_count, 0); + } + kfree(ncm->notify_req->buf); usb_ep_free_request(ncm->notify, ncm->notify_req); } diff --git a/drivers/usb/gadget/legacy/cdc2.c b/drivers/usb/gadget/legacy/cdc2.c index da1c37933ca1..8d7a556ece30 100644 --- a/drivers/usb/gadget/legacy/cdc2.c +++ b/drivers/usb/gadget/legacy/cdc2.c @@ -225,7 +225,7 @@ static struct usb_composite_driver cdc_driver = { .name = "g_cdc", .dev = &device_desc, .strings = dev_strings, - .max_speed = USB_SPEED_HIGH, + .max_speed = USB_SPEED_SUPER, .bind = cdc_bind, .unbind = cdc_unbind, }; diff --git a/drivers/usb/gadget/legacy/g_ffs.c b/drivers/usb/gadget/legacy/g_ffs.c index b640ed3fcf70..ae6d8f7092b8 100644 --- a/drivers/usb/gadget/legacy/g_ffs.c +++ b/drivers/usb/gadget/legacy/g_ffs.c @@ -149,7 +149,7 @@ static struct usb_composite_driver gfs_driver = { .name = DRIVER_NAME, .dev = &gfs_dev_desc, .strings = gfs_dev_strings, - .max_speed = USB_SPEED_HIGH, + .max_speed = USB_SPEED_SUPER, .bind = gfs_bind, .unbind = gfs_unbind, }; diff --git a/drivers/usb/gadget/legacy/multi.c b/drivers/usb/gadget/legacy/multi.c index 50515f9e1022..ec9749845660 100644 --- a/drivers/usb/gadget/legacy/multi.c +++ b/drivers/usb/gadget/legacy/multi.c @@ -482,7 +482,7 @@ static struct usb_composite_driver multi_driver = { .name = "g_multi", .dev = &device_desc, .strings = dev_strings, - .max_speed = USB_SPEED_HIGH, + .max_speed = USB_SPEED_SUPER, .bind = multi_bind, .unbind = multi_unbind, .needs_serial = 1, diff --git a/drivers/usb/gadget/legacy/ncm.c b/drivers/usb/gadget/legacy/ncm.c index 8465f081e921..c61e71ba7045 100644 --- a/drivers/usb/gadget/legacy/ncm.c +++ b/drivers/usb/gadget/legacy/ncm.c @@ -197,7 +197,7 @@ static struct usb_composite_driver ncm_driver = { .name = "g_ncm", .dev = &device_desc, .strings = dev_strings, - .max_speed = USB_SPEED_HIGH, + .max_speed = USB_SPEED_SUPER, .bind = gncm_bind, .unbind = gncm_unbind, }; diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index 8b4ff9fff340..753645bb2527 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -591,6 +591,12 @@ static int tcpci_probe(struct i2c_client *client, static int tcpci_remove(struct i2c_client *client) { struct tcpci_chip *chip = i2c_get_clientdata(client); + int err; + + /* Disable chip interrupts before unregistering port */ + err = tcpci_write16(chip->tcpci, TCPC_ALERT_MASK, 0); + if (err < 0) + return err; tcpci_unregister_port(chip->tcpci); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 9f4117766bb1..c962d9b370c6 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -474,7 +474,9 @@ static int init_vqs(struct virtio_balloon *vb) names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate"; callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack; names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate"; + callbacks[VIRTIO_BALLOON_VQ_STATS] = NULL; names[VIRTIO_BALLOON_VQ_STATS] = NULL; + callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { @@ -898,8 +900,7 @@ static int virtballoon_probe(struct virtio_device *vdev) vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); if (IS_ERR(vb->vb_dev_info.inode)) { err = PTR_ERR(vb->vb_dev_info.inode); - kern_unmount(balloon_mnt); - goto out_del_vqs; + goto out_kern_unmount; } vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; #endif @@ -910,13 +911,13 @@ static int virtballoon_probe(struct virtio_device *vdev) */ if (virtqueue_get_vring_size(vb->free_page_vq) < 2) { err = -ENOSPC; - goto out_del_vqs; + goto out_iput; } vb->balloon_wq = alloc_workqueue("balloon-wq", WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); if (!vb->balloon_wq) { err = -ENOMEM; - goto out_del_vqs; + goto out_iput; } INIT_WORK(&vb->report_free_page_work, report_free_page_func); vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP; @@ -950,6 +951,12 @@ static int virtballoon_probe(struct virtio_device *vdev) out_del_balloon_wq: if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) destroy_workqueue(vb->balloon_wq); +out_iput: +#ifdef CONFIG_BALLOON_COMPACTION + iput(vb->vb_dev_info.inode); +out_kern_unmount: + kern_unmount(balloon_mnt); +#endif out_del_vqs: vdev->config->del_vqs(vdev); out_free_vb: @@ -965,6 +972,10 @@ static void remove_common(struct virtio_balloon *vb) leak_balloon(vb, vb->num_pages); update_balloon_size(vb); + /* There might be free pages that are being reported: release them. */ + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) + return_free_pages_to_mm(vb, ULONG_MAX); + /* Now we reset the device so we can clean up the queues. */ vb->vdev->config->reset(vb->vdev); diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index f2862f66c2ac..222d630c41fc 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -294,7 +294,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, /* Best option: one for change interrupt, one per vq. */ nvectors = 1; for (i = 0; i < nvqs; ++i) - if (callbacks[i]) + if (names[i] && callbacks[i]) ++nvectors; } else { /* Second best: one for change, shared for all vqs. */ diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c index 21e8085b848b..861daf4f37b2 100644 --- a/drivers/watchdog/watchdog_core.c +++ b/drivers/watchdog/watchdog_core.c @@ -147,6 +147,25 @@ int watchdog_init_timeout(struct watchdog_device *wdd, } EXPORT_SYMBOL_GPL(watchdog_init_timeout); +static int watchdog_reboot_notifier(struct notifier_block *nb, + unsigned long code, void *data) +{ + struct watchdog_device *wdd; + + wdd = container_of(nb, struct watchdog_device, reboot_nb); + if (code == SYS_DOWN || code == SYS_HALT) { + if (watchdog_active(wdd)) { + int ret; + + ret = wdd->ops->stop(wdd); + if (ret) + return NOTIFY_BAD; + } + } + + return NOTIFY_DONE; +} + static int watchdog_restart_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -235,6 +254,19 @@ static int __watchdog_register_device(struct watchdog_device *wdd) } } + if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) { + wdd->reboot_nb.notifier_call = watchdog_reboot_notifier; + + ret = register_reboot_notifier(&wdd->reboot_nb); + if (ret) { + pr_err("watchdog%d: Cannot register reboot notifier (%d)\n", + wdd->id, ret); + watchdog_dev_unregister(wdd); + ida_simple_remove(&watchdog_ida, id); + return ret; + } + } + if (wdd->ops->restart) { wdd->restart_nb.notifier_call = watchdog_restart_notifier; @@ -289,6 +321,9 @@ static void __watchdog_unregister_device(struct watchdog_device *wdd) if (wdd->ops->restart) unregister_restart_handler(&wdd->restart_nb); + if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) + unregister_reboot_notifier(&wdd->reboot_nb); + watchdog_dev_unregister(wdd); ida_simple_remove(&watchdog_ida, wdd->id); } diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index 62483a99105c..ce04edc69e5f 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -38,7 +38,6 @@ #include /* For handling misc devices */ #include /* For module stuff/... */ #include /* For mutexes */ -#include /* For reboot notifier */ #include /* For memory functions */ #include /* For standard types (like size_t) */ #include /* For watchdog specific items */ @@ -1077,25 +1076,6 @@ static void watchdog_cdev_unregister(struct watchdog_device *wdd) put_device(&wd_data->dev); } -static int watchdog_reboot_notifier(struct notifier_block *nb, - unsigned long code, void *data) -{ - struct watchdog_device *wdd; - - wdd = container_of(nb, struct watchdog_device, reboot_nb); - if (code == SYS_DOWN || code == SYS_HALT) { - if (watchdog_active(wdd)) { - int ret; - - ret = wdd->ops->stop(wdd); - if (ret) - return NOTIFY_BAD; - } - } - - return NOTIFY_DONE; -} - /* * watchdog_dev_register: register a watchdog device * @wdd: watchdog device @@ -1114,22 +1094,8 @@ int watchdog_dev_register(struct watchdog_device *wdd) return ret; ret = watchdog_register_pretimeout(wdd); - if (ret) { + if (ret) watchdog_cdev_unregister(wdd); - return ret; - } - - if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) { - wdd->reboot_nb.notifier_call = watchdog_reboot_notifier; - - ret = devm_register_reboot_notifier(&wdd->wd_data->dev, - &wdd->reboot_nb); - if (ret) { - pr_err("watchdog%d: Cannot register reboot notifier (%d)\n", - wdd->id, ret); - watchdog_dev_unregister(wdd); - } - } return ret; } diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index 6d12fc368210..a8d24433c8e9 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -94,7 +94,7 @@ static void watch_target(struct xenbus_watch *watch, "%llu", &static_max) == 1)) static_max >>= PAGE_SHIFT - 10; else - static_max = new_target; + static_max = balloon_stats.current_pages; target_diff = (xen_pv_domain() || xen_initial_domain()) ? 0 : static_max - balloon_stats.target_pages; diff --git a/fs/aio.c b/fs/aio.c index 0d9a559d488c..4115d5ad6b90 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1610,6 +1610,14 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb, return 0; } +static void aio_poll_put_work(struct work_struct *work) +{ + struct poll_iocb *req = container_of(work, struct poll_iocb, work); + struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll); + + iocb_put(iocb); +} + static void aio_poll_complete_work(struct work_struct *work) { struct poll_iocb *req = container_of(work, struct poll_iocb, work); @@ -1674,6 +1682,8 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, list_del_init(&req->wait.entry); if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { + struct kioctx *ctx = iocb->ki_ctx; + /* * Try to complete the iocb inline if we can. Use * irqsave/irqrestore because not all filesystems (e.g. fuse) @@ -1683,8 +1693,14 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, list_del(&iocb->ki_list); iocb->ki_res.res = mangle_poll(mask); req->done = true; - spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags); - iocb_put(iocb); + if (iocb->ki_eventfd && eventfd_signal_count()) { + iocb = NULL; + INIT_WORK(&req->work, aio_poll_put_work); + schedule_work(&req->work); + } + spin_unlock_irqrestore(&ctx->ctx_lock, flags); + if (iocb) + iocb_put(iocb); } else { schedule_work(&req->work); } diff --git a/fs/attr.c b/fs/attr.c index df28035aa23e..b4bbdbd4c8ca 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -183,18 +183,12 @@ void setattr_copy(struct inode *inode, const struct iattr *attr) inode->i_uid = attr->ia_uid; if (ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; - if (ia_valid & ATTR_ATIME) { - inode->i_atime = timestamp_truncate(attr->ia_atime, - inode); - } - if (ia_valid & ATTR_MTIME) { - inode->i_mtime = timestamp_truncate(attr->ia_mtime, - inode); - } - if (ia_valid & ATTR_CTIME) { - inode->i_ctime = timestamp_truncate(attr->ia_ctime, - inode); - } + if (ia_valid & ATTR_ATIME) + inode->i_atime = attr->ia_atime; + if (ia_valid & ATTR_MTIME) + inode->i_mtime = attr->ia_mtime; + if (ia_valid & ATTR_CTIME) + inode->i_ctime = attr->ia_ctime; if (ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; @@ -268,8 +262,13 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de attr->ia_ctime = now; if (!(ia_valid & ATTR_ATIME_SET)) attr->ia_atime = now; + else + attr->ia_atime = timestamp_truncate(attr->ia_atime, inode); if (!(ia_valid & ATTR_MTIME_SET)) attr->ia_mtime = now; + else + attr->ia_mtime = timestamp_truncate(attr->ia_mtime, inode); + if (ia_valid & ATTR_KILL_PRIV) { error = security_inode_need_killpriv(dentry); if (error < 0) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index da9b0f060a9d..a989105d39c8 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -330,12 +330,10 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem) { write_lock(&fs_info->tree_mod_log_lock); - spin_lock(&fs_info->tree_mod_seq_lock); if (!elem->seq) { elem->seq = btrfs_inc_tree_mod_seq(fs_info); list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); } - spin_unlock(&fs_info->tree_mod_seq_lock); write_unlock(&fs_info->tree_mod_log_lock); return elem->seq; @@ -355,7 +353,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, if (!seq_putting) return; - spin_lock(&fs_info->tree_mod_seq_lock); + write_lock(&fs_info->tree_mod_log_lock); list_del(&elem->list); elem->seq = 0; @@ -366,19 +364,17 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, * blocker with lower sequence number exists, we * cannot remove anything from the log */ - spin_unlock(&fs_info->tree_mod_seq_lock); + write_unlock(&fs_info->tree_mod_log_lock); return; } min_seq = cur_elem->seq; } } - spin_unlock(&fs_info->tree_mod_seq_lock); /* * anything that's lower than the lowest existing (read: blocked) * sequence number can be removed from the tree. */ - write_lock(&fs_info->tree_mod_log_lock); tm_root = &fs_info->tree_mod_log; for (node = rb_first(tm_root); node; node = next) { next = rb_next(node); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5e9f80b28fcf..290ca193c6c0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -671,14 +671,12 @@ struct btrfs_fs_info { atomic_t nr_delayed_iputs; wait_queue_head_t delayed_iputs_wait; - /* this protects tree_mod_seq_list */ - spinlock_t tree_mod_seq_lock; atomic64_t tree_mod_seq; - struct list_head tree_mod_seq_list; - /* this protects tree_mod_log */ + /* this protects tree_mod_log and tree_mod_seq_list */ rwlock_t tree_mod_log_lock; struct rb_root tree_mod_log; + struct list_head tree_mod_seq_list; atomic_t async_delalloc_pages; diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index df3bd880061d..dfdb7d4f8406 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -492,7 +492,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, if (head->is_data) return; - spin_lock(&fs_info->tree_mod_seq_lock); + read_lock(&fs_info->tree_mod_log_lock); if (!list_empty(&fs_info->tree_mod_seq_list)) { struct seq_list *elem; @@ -500,7 +500,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, struct seq_list, list); seq = elem->seq; } - spin_unlock(&fs_info->tree_mod_seq_lock); + read_unlock(&fs_info->tree_mod_log_lock); again: for (node = rb_first_cached(&head->ref_tree); node; @@ -518,7 +518,7 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq) struct seq_list *elem; int ret = 0; - spin_lock(&fs_info->tree_mod_seq_lock); + read_lock(&fs_info->tree_mod_log_lock); if (!list_empty(&fs_info->tree_mod_seq_list)) { elem = list_first_entry(&fs_info->tree_mod_seq_list, struct seq_list, list); @@ -531,7 +531,7 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq) } } - spin_unlock(&fs_info->tree_mod_seq_lock); + read_unlock(&fs_info->tree_mod_log_lock); return ret; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bae334212ee2..7becc5e96f92 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2016,7 +2016,7 @@ static void free_root_extent_buffers(struct btrfs_root *root) } /* helper to cleanup tree roots */ -static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) +static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root) { free_root_extent_buffers(info->tree_root); @@ -2025,7 +2025,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) free_root_extent_buffers(info->csum_root); free_root_extent_buffers(info->quota_root); free_root_extent_buffers(info->uuid_root); - if (chunk_root) + if (free_chunk_root) free_root_extent_buffers(info->chunk_root); free_root_extent_buffers(info->free_space_root); } @@ -2652,7 +2652,6 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->fs_roots_radix_lock); spin_lock_init(&fs_info->delayed_iput_lock); spin_lock_init(&fs_info->defrag_inodes_lock); - spin_lock_init(&fs_info->tree_mod_seq_lock); spin_lock_init(&fs_info->super_lock); spin_lock_init(&fs_info->buffer_lock); spin_lock_init(&fs_info->unused_bgs_lock); @@ -3324,7 +3323,7 @@ fail_block_groups: btrfs_put_block_group_cache(fs_info); fail_tree_roots: - free_root_pointers(fs_info, 1); + free_root_pointers(fs_info, true); invalidate_inode_pages2(fs_info->btree_inode->i_mapping); fail_sb_buffer: @@ -3356,7 +3355,7 @@ recovery_tree_root: if (!btrfs_test_opt(fs_info, USEBACKUPROOT)) goto fail_tree_roots; - free_root_pointers(fs_info, 0); + free_root_pointers(fs_info, false); /* don't use the log in recovery mode, it won't be valid */ btrfs_set_super_log_root(disk_super, 0); @@ -4047,10 +4046,17 @@ void close_ctree(struct btrfs_fs_info *fs_info) invalidate_inode_pages2(fs_info->btree_inode->i_mapping); btrfs_stop_all_workers(fs_info); - btrfs_free_block_groups(fs_info); - clear_bit(BTRFS_FS_OPEN, &fs_info->flags); - free_root_pointers(fs_info, 1); + free_root_pointers(fs_info, true); + + /* + * We must free the block groups after dropping the fs_roots as we could + * have had an IO error and have left over tree log blocks that aren't + * cleaned up until the fs roots are freed. This makes the block group + * accounting appear to be wrong because there's pending reserved bytes, + * so make sure we do the block group cleanup afterwards. + */ + btrfs_free_block_groups(fs_info); iput(fs_info->btree_inode); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 33c6b191ca59..284540cdbbd9 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1583,21 +1583,25 @@ void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, /* Find first extent with bits cleared */ while (1) { node = __etree_search(tree, start, &next, &prev, NULL, NULL); - if (!node) { + if (!node && !next && !prev) { + /* + * Tree is completely empty, send full range and let + * caller deal with it + */ + *start_ret = 0; + *end_ret = -1; + goto out; + } else if (!node && !next) { + /* + * We are past the last allocated chunk, set start at + * the end of the last extent. + */ + state = rb_entry(prev, struct extent_state, rb_node); + *start_ret = state->end + 1; + *end_ret = -1; + goto out; + } else if (!node) { node = next; - if (!node) { - /* - * We are past the last allocated chunk, - * set start at the end of the last extent. The - * device alloc tree should never be empty so - * prev is always set. - */ - ASSERT(prev); - state = rb_entry(prev, struct extent_state, rb_node); - *start_ret = state->end + 1; - *end_ret = -1; - goto out; - } } /* * At this point 'node' either contains 'start' or start is @@ -3938,6 +3942,11 @@ int btree_write_cache_pages(struct address_space *mapping, if (wbc->range_cyclic) { index = mapping->writeback_index; /* Start from prev offset */ end = -1; + /* + * Start from the beginning does not need to cycle over the + * range, mark it as scanned. + */ + scanned = (index == 0); } else { index = wbc->range_start >> PAGE_SHIFT; end = wbc->range_end >> PAGE_SHIFT; @@ -3955,7 +3964,6 @@ retry: tag))) { unsigned i; - scanned = 1; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; @@ -4084,6 +4092,11 @@ static int extent_write_cache_pages(struct address_space *mapping, if (wbc->range_cyclic) { index = mapping->writeback_index; /* Start from prev offset */ end = -1; + /* + * Start from the beginning does not need to cycle over the + * range, mark it as scanned. + */ + scanned = (index == 0); } else { index = wbc->range_start >> PAGE_SHIFT; end = wbc->range_end >> PAGE_SHIFT; @@ -4117,7 +4130,6 @@ retry: &index, end, tag))) { unsigned i; - scanned = 1; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; @@ -4177,7 +4189,16 @@ retry: */ scanned = 1; index = 0; - goto retry; + + /* + * If we're looping we could run into a page that is locked by a + * writer and that writer could be waiting on writeback for a + * page in our current bio, and thus deadlock, so flush the + * write bio here. + */ + ret = flush_write_bio(epd); + if (!ret) + goto retry; } if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole)) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8e86b2d700c4..d88b8d8897cc 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3244,6 +3244,7 @@ static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len, struct inode *dst, u64 dst_loff) { + const u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; int ret; /* @@ -3251,7 +3252,7 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len, * source range to serialize with relocation. */ btrfs_double_extent_lock(src, loff, dst, dst_loff, len); - ret = btrfs_clone(src, dst, loff, len, len, dst_loff, 1); + ret = btrfs_clone(src, dst, loff, len, ALIGN(len, bs), dst_loff, 1); btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); return ret; diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 99fe9bf3fdac..98f9684e7ffc 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -121,7 +121,6 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize) spin_lock_init(&fs_info->qgroup_lock); spin_lock_init(&fs_info->super_lock); spin_lock_init(&fs_info->fs_roots_radix_lock); - spin_lock_init(&fs_info->tree_mod_seq_lock); mutex_init(&fs_info->qgroup_ioctl_lock); mutex_init(&fs_info->qgroup_rescan_lock); rwlock_init(&fs_info->tree_mod_log_lock); diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 123d9a614357..df7ce874a74b 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -441,8 +441,17 @@ static int test_find_first_clear_extent_bit(void) int ret = -EINVAL; test_msg("running find_first_clear_extent_bit test"); + extent_io_tree_init(NULL, &tree, IO_TREE_SELFTEST, NULL); + /* Test correct handling of empty tree */ + find_first_clear_extent_bit(&tree, 0, &start, &end, CHUNK_TRIMMED); + if (start != 0 || end != -1) { + test_err( + "error getting a range from completely empty tree: start %llu end %llu", + start, end); + goto out; + } /* * Set 1M-4M alloc/discard and 32M-64M thus leaving a hole between * 4M-32M diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8624bdee8c5b..ceffec752234 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -77,13 +77,14 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) } } -static noinline void switch_commit_roots(struct btrfs_transaction *trans) +static noinline void switch_commit_roots(struct btrfs_trans_handle *trans) { + struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root, *tmp; down_write(&fs_info->commit_root_sem); - list_for_each_entry_safe(root, tmp, &trans->switch_commits, + list_for_each_entry_safe(root, tmp, &cur_trans->switch_commits, dirty_list) { list_del_init(&root->dirty_list); free_extent_buffer(root->commit_root); @@ -95,16 +96,17 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans) } /* We can free old roots now. */ - spin_lock(&trans->dropped_roots_lock); - while (!list_empty(&trans->dropped_roots)) { - root = list_first_entry(&trans->dropped_roots, + spin_lock(&cur_trans->dropped_roots_lock); + while (!list_empty(&cur_trans->dropped_roots)) { + root = list_first_entry(&cur_trans->dropped_roots, struct btrfs_root, root_list); list_del_init(&root->root_list); - spin_unlock(&trans->dropped_roots_lock); + spin_unlock(&cur_trans->dropped_roots_lock); + btrfs_free_log(trans, root); btrfs_drop_and_free_fs_root(fs_info, root); - spin_lock(&trans->dropped_roots_lock); + spin_lock(&cur_trans->dropped_roots_lock); } - spin_unlock(&trans->dropped_roots_lock); + spin_unlock(&cur_trans->dropped_roots_lock); up_write(&fs_info->commit_root_sem); } @@ -1359,7 +1361,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, ret = commit_cowonly_roots(trans); if (ret) goto out; - switch_commit_roots(trans->transaction); + switch_commit_roots(trans); ret = btrfs_write_and_wait_transaction(trans); if (ret) btrfs_handle_fs_error(fs_info, ret, @@ -1949,6 +1951,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) struct btrfs_transaction *prev_trans = NULL; int ret; + /* + * Some places just start a transaction to commit it. We need to make + * sure that if this commit fails that the abort code actually marks the + * transaction as failed, so set trans->dirty to make the abort code do + * the right thing. + */ + trans->dirty = true; + /* Stop the commit early if ->aborted is set */ if (unlikely(READ_ONCE(cur_trans->aborted))) { ret = cur_trans->aborted; @@ -2237,7 +2247,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) list_add_tail(&fs_info->chunk_root->dirty_list, &cur_trans->switch_commits); - switch_commit_roots(cur_trans); + switch_commit_roots(trans); ASSERT(list_empty(&cur_trans->dirty_bgs)); ASSERT(list_empty(&cur_trans->io_bgs)); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ab27e6cd9b3e..6f2178618c22 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3953,7 +3953,7 @@ static int log_csums(struct btrfs_trans_handle *trans, static noinline int copy_items(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_path *dst_path, - struct btrfs_path *src_path, u64 *last_extent, + struct btrfs_path *src_path, int start_slot, int nr, int inode_only, u64 logged_isize) { @@ -3964,7 +3964,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item *extent; struct btrfs_inode_item *inode_item; struct extent_buffer *src = src_path->nodes[0]; - struct btrfs_key first_key, last_key, key; int ret; struct btrfs_key *ins_keys; u32 *ins_sizes; @@ -3972,9 +3971,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, int i; struct list_head ordered_sums; int skip_csum = inode->flags & BTRFS_INODE_NODATASUM; - bool has_extents = false; - bool need_find_last_extent = true; - bool done = false; INIT_LIST_HEAD(&ordered_sums); @@ -3983,8 +3979,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, if (!ins_data) return -ENOMEM; - first_key.objectid = (u64)-1; - ins_sizes = (u32 *)ins_data; ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); @@ -4005,9 +3999,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, src_offset = btrfs_item_ptr_offset(src, start_slot + i); - if (i == nr - 1) - last_key = ins_keys[i]; - if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { inode_item = btrfs_item_ptr(dst_path->nodes[0], dst_path->slots[0], @@ -4021,20 +4012,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, src_offset, ins_sizes[i]); } - /* - * We set need_find_last_extent here in case we know we were - * processing other items and then walk into the first extent in - * the inode. If we don't hit an extent then nothing changes, - * we'll do the last search the next time around. - */ - if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) { - has_extents = true; - if (first_key.objectid == (u64)-1) - first_key = ins_keys[i]; - } else { - need_find_last_extent = false; - } - /* take a reference on file data extents so that truncates * or deletes of this inode don't have to relog the inode * again @@ -4100,167 +4077,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, kfree(sums); } - if (!has_extents) - return ret; - - if (need_find_last_extent && *last_extent == first_key.offset) { - /* - * We don't have any leafs between our current one and the one - * we processed before that can have file extent items for our - * inode (and have a generation number smaller than our current - * transaction id). - */ - need_find_last_extent = false; - } - - /* - * Because we use btrfs_search_forward we could skip leaves that were - * not modified and then assume *last_extent is valid when it really - * isn't. So back up to the previous leaf and read the end of the last - * extent before we go and fill in holes. - */ - if (need_find_last_extent) { - u64 len; - - ret = btrfs_prev_leaf(inode->root, src_path); - if (ret < 0) - return ret; - if (ret) - goto fill_holes; - if (src_path->slots[0]) - src_path->slots[0]--; - src = src_path->nodes[0]; - btrfs_item_key_to_cpu(src, &key, src_path->slots[0]); - if (key.objectid != btrfs_ino(inode) || - key.type != BTRFS_EXTENT_DATA_KEY) - goto fill_holes; - extent = btrfs_item_ptr(src, src_path->slots[0], - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(src, extent) == - BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_ram_bytes(src, extent); - *last_extent = ALIGN(key.offset + len, - fs_info->sectorsize); - } else { - len = btrfs_file_extent_num_bytes(src, extent); - *last_extent = key.offset + len; - } - } -fill_holes: - /* So we did prev_leaf, now we need to move to the next leaf, but a few - * things could have happened - * - * 1) A merge could have happened, so we could currently be on a leaf - * that holds what we were copying in the first place. - * 2) A split could have happened, and now not all of the items we want - * are on the same leaf. - * - * So we need to adjust how we search for holes, we need to drop the - * path and re-search for the first extent key we found, and then walk - * forward until we hit the last one we copied. - */ - if (need_find_last_extent) { - /* btrfs_prev_leaf could return 1 without releasing the path */ - btrfs_release_path(src_path); - ret = btrfs_search_slot(NULL, inode->root, &first_key, - src_path, 0, 0); - if (ret < 0) - return ret; - ASSERT(ret == 0); - src = src_path->nodes[0]; - i = src_path->slots[0]; - } else { - i = start_slot; - } - - /* - * Ok so here we need to go through and fill in any holes we may have - * to make sure that holes are punched for those areas in case they had - * extents previously. - */ - while (!done) { - u64 offset, len; - u64 extent_end; - - if (i >= btrfs_header_nritems(src_path->nodes[0])) { - ret = btrfs_next_leaf(inode->root, src_path); - if (ret < 0) - return ret; - ASSERT(ret == 0); - src = src_path->nodes[0]; - i = 0; - need_find_last_extent = true; - } - - btrfs_item_key_to_cpu(src, &key, i); - if (!btrfs_comp_cpu_keys(&key, &last_key)) - done = true; - if (key.objectid != btrfs_ino(inode) || - key.type != BTRFS_EXTENT_DATA_KEY) { - i++; - continue; - } - extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(src, extent) == - BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_ram_bytes(src, extent); - extent_end = ALIGN(key.offset + len, - fs_info->sectorsize); - } else { - len = btrfs_file_extent_num_bytes(src, extent); - extent_end = key.offset + len; - } - i++; - - if (*last_extent == key.offset) { - *last_extent = extent_end; - continue; - } - offset = *last_extent; - len = key.offset - *last_extent; - ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode), - offset, 0, 0, len, 0, len, 0, 0, 0); - if (ret) - break; - *last_extent = extent_end; - } - - /* - * Check if there is a hole between the last extent found in our leaf - * and the first extent in the next leaf. If there is one, we need to - * log an explicit hole so that at replay time we can punch the hole. - */ - if (ret == 0 && - key.objectid == btrfs_ino(inode) && - key.type == BTRFS_EXTENT_DATA_KEY && - i == btrfs_header_nritems(src_path->nodes[0])) { - ret = btrfs_next_leaf(inode->root, src_path); - need_find_last_extent = true; - if (ret > 0) { - ret = 0; - } else if (ret == 0) { - btrfs_item_key_to_cpu(src_path->nodes[0], &key, - src_path->slots[0]); - if (key.objectid == btrfs_ino(inode) && - key.type == BTRFS_EXTENT_DATA_KEY && - *last_extent < key.offset) { - const u64 len = key.offset - *last_extent; - - ret = btrfs_insert_file_extent(trans, log, - btrfs_ino(inode), - *last_extent, 0, - 0, len, 0, len, - 0, 0, 0); - *last_extent += len; - } - } - } - /* - * Need to let the callers know we dropped the path so they should - * re-search. - */ - if (!ret && need_find_last_extent) - ret = 1; return ret; } @@ -4425,7 +4241,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, const u64 i_size = i_size_read(&inode->vfs_inode); const u64 ino = btrfs_ino(inode); struct btrfs_path *dst_path = NULL; - u64 last_extent = (u64)-1; + bool dropped_extents = false; int ins_nr = 0; int start_slot; int ret; @@ -4447,8 +4263,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, if (slot >= btrfs_header_nritems(leaf)) { if (ins_nr > 0) { ret = copy_items(trans, inode, dst_path, path, - &last_extent, start_slot, - ins_nr, 1, 0); + start_slot, ins_nr, 1, 0); if (ret < 0) goto out; ins_nr = 0; @@ -4472,8 +4287,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, path->slots[0]++; continue; } - if (last_extent == (u64)-1) { - last_extent = key.offset; + if (!dropped_extents) { /* * Avoid logging extent items logged in past fsync calls * and leading to duplicate keys in the log tree. @@ -4487,6 +4301,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, } while (ret == -EAGAIN); if (ret) goto out; + dropped_extents = true; } if (ins_nr == 0) start_slot = slot; @@ -4501,7 +4316,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, } } if (ins_nr > 0) { - ret = copy_items(trans, inode, dst_path, path, &last_extent, + ret = copy_items(trans, inode, dst_path, path, start_slot, ins_nr, 1, 0); if (ret > 0) ret = 0; @@ -4688,13 +4503,8 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, if (slot >= nritems) { if (ins_nr > 0) { - u64 last_extent = 0; - ret = copy_items(trans, inode, dst_path, path, - &last_extent, start_slot, - ins_nr, 1, 0); - /* can't be 1, extent items aren't processed */ - ASSERT(ret <= 0); + start_slot, ins_nr, 1, 0); if (ret < 0) return ret; ins_nr = 0; @@ -4718,13 +4528,8 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, cond_resched(); } if (ins_nr > 0) { - u64 last_extent = 0; - ret = copy_items(trans, inode, dst_path, path, - &last_extent, start_slot, - ins_nr, 1, 0); - /* can't be 1, extent items aren't processed */ - ASSERT(ret <= 0); + start_slot, ins_nr, 1, 0); if (ret < 0) return ret; } @@ -4733,100 +4538,119 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, } /* - * If the no holes feature is enabled we need to make sure any hole between the - * last extent and the i_size of our inode is explicitly marked in the log. This - * is to make sure that doing something like: - * - * 1) create file with 128Kb of data - * 2) truncate file to 64Kb - * 3) truncate file to 256Kb - * 4) fsync file - * 5) - * 6) mount fs and trigger log replay - * - * Will give us a file with a size of 256Kb, the first 64Kb of data match what - * the file had in its first 64Kb of data at step 1 and the last 192Kb of the - * file correspond to a hole. The presence of explicit holes in a log tree is - * what guarantees that log replay will remove/adjust file extent items in the - * fs/subvol tree. - * - * Here we do not need to care about holes between extents, that is already done - * by copy_items(). We also only need to do this in the full sync path, where we - * lookup for extents from the fs/subvol tree only. In the fast path case, we - * lookup the list of modified extent maps and if any represents a hole, we - * insert a corresponding extent representing a hole in the log tree. + * When using the NO_HOLES feature if we punched a hole that causes the + * deletion of entire leafs or all the extent items of the first leaf (the one + * that contains the inode item and references) we may end up not processing + * any extents, because there are no leafs with a generation matching the + * current transaction that have extent items for our inode. So we need to find + * if any holes exist and then log them. We also need to log holes after any + * truncate operation that changes the inode's size. */ -static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_inode *inode, - struct btrfs_path *path) +static int btrfs_log_holes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_inode *inode, + struct btrfs_path *path) { struct btrfs_fs_info *fs_info = root->fs_info; - int ret; struct btrfs_key key; - u64 hole_start; - u64 hole_size; - struct extent_buffer *leaf; - struct btrfs_root *log = root->log_root; const u64 ino = btrfs_ino(inode); const u64 i_size = i_size_read(&inode->vfs_inode); + u64 prev_extent_end = 0; + int ret; - if (!btrfs_fs_incompat(fs_info, NO_HOLES)) + if (!btrfs_fs_incompat(fs_info, NO_HOLES) || i_size == 0) return 0; key.objectid = ino; key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = (u64)-1; + key.offset = 0; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - ASSERT(ret != 0); if (ret < 0) return ret; - ASSERT(path->slots[0] > 0); - path->slots[0]--; - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - - if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) { - /* inode does not have any extents */ - hole_start = 0; - hole_size = i_size; - } else { + while (true) { struct btrfs_file_extent_item *extent; + struct extent_buffer *leaf = path->nodes[0]; u64 len; - /* - * If there's an extent beyond i_size, an explicit hole was - * already inserted by copy_items(). - */ - if (key.offset >= i_size) - return 0; + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ret; + if (ret > 0) { + ret = 0; + break; + } + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) + break; + + /* We have a hole, log it. */ + if (prev_extent_end < key.offset) { + const u64 hole_len = key.offset - prev_extent_end; + + /* + * Release the path to avoid deadlocks with other code + * paths that search the root while holding locks on + * leafs from the log root. + */ + btrfs_release_path(path); + ret = btrfs_insert_file_extent(trans, root->log_root, + ino, prev_extent_end, 0, + 0, hole_len, 0, hole_len, + 0, 0, 0); + if (ret < 0) + return ret; + + /* + * Search for the same key again in the root. Since it's + * an extent item and we are holding the inode lock, the + * key must still exist. If it doesn't just emit warning + * and return an error to fall back to a transaction + * commit. + */ + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ret; + if (WARN_ON(ret > 0)) + return -ENOENT; + leaf = path->nodes[0]; + } extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, extent) == - BTRFS_FILE_EXTENT_INLINE) - return 0; + BTRFS_FILE_EXTENT_INLINE) { + len = btrfs_file_extent_ram_bytes(leaf, extent); + prev_extent_end = ALIGN(key.offset + len, + fs_info->sectorsize); + } else { + len = btrfs_file_extent_num_bytes(leaf, extent); + prev_extent_end = key.offset + len; + } - len = btrfs_file_extent_num_bytes(leaf, extent); - /* Last extent goes beyond i_size, no need to log a hole. */ - if (key.offset + len > i_size) - return 0; - hole_start = key.offset + len; - hole_size = i_size - hole_start; + path->slots[0]++; + cond_resched(); } - btrfs_release_path(path); - /* Last extent ends at i_size. */ - if (hole_size == 0) - return 0; + if (prev_extent_end < i_size) { + u64 hole_len; - hole_size = ALIGN(hole_size, fs_info->sectorsize); - ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0, - hole_size, 0, hole_size, 0, 0, 0); - return ret; + btrfs_release_path(path); + hole_len = ALIGN(i_size - prev_extent_end, fs_info->sectorsize); + ret = btrfs_insert_file_extent(trans, root->log_root, + ino, prev_extent_end, 0, 0, + hole_len, 0, hole_len, + 0, 0, 0); + if (ret < 0) + return ret; + } + + return 0; } /* @@ -5030,6 +4854,50 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, } continue; } + /* + * If the inode was already logged skip it - otherwise we can + * hit an infinite loop. Example: + * + * From the commit root (previous transaction) we have the + * following inodes: + * + * inode 257 a directory + * inode 258 with references "zz" and "zz_link" on inode 257 + * inode 259 with reference "a" on inode 257 + * + * And in the current (uncommitted) transaction we have: + * + * inode 257 a directory, unchanged + * inode 258 with references "a" and "a2" on inode 257 + * inode 259 with reference "zz_link" on inode 257 + * inode 261 with reference "zz" on inode 257 + * + * When logging inode 261 the following infinite loop could + * happen if we don't skip already logged inodes: + * + * - we detect inode 258 as a conflicting inode, with inode 261 + * on reference "zz", and log it; + * + * - we detect inode 259 as a conflicting inode, with inode 258 + * on reference "a", and log it; + * + * - we detect inode 258 as a conflicting inode, with inode 259 + * on reference "zz_link", and log it - again! After this we + * repeat the above steps forever. + */ + spin_lock(&BTRFS_I(inode)->lock); + /* + * Check the inode's logged_trans only instead of + * btrfs_inode_in_log(). This is because the last_log_commit of + * the inode is not updated when we only log that it exists and + * and it has the full sync bit set (see btrfs_log_inode()). + */ + if (BTRFS_I(inode)->logged_trans == trans->transid) { + spin_unlock(&BTRFS_I(inode)->lock); + btrfs_add_delayed_iput(inode); + continue; + } + spin_unlock(&BTRFS_I(inode)->lock); /* * We are safe logging the other inode without acquiring its * lock as long as we log with the LOG_INODE_EXISTS mode. We @@ -5129,7 +4997,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_key min_key; struct btrfs_key max_key; struct btrfs_root *log = root->log_root; - u64 last_extent = 0; int err = 0; int ret; int nritems; @@ -5307,7 +5174,7 @@ again: ins_start_slot = path->slots[0]; } ret = copy_items(trans, inode, dst_path, path, - &last_extent, ins_start_slot, + ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { @@ -5330,17 +5197,13 @@ again: if (ins_nr == 0) goto next_slot; ret = copy_items(trans, inode, dst_path, path, - &last_extent, ins_start_slot, + ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } ins_nr = 0; - if (ret) { - btrfs_release_path(path); - continue; - } goto next_slot; } @@ -5353,18 +5216,13 @@ again: goto next_slot; } - ret = copy_items(trans, inode, dst_path, path, &last_extent, + ret = copy_items(trans, inode, dst_path, path, ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } - if (ret) { - ins_nr = 0; - btrfs_release_path(path); - continue; - } ins_nr = 1; ins_start_slot = path->slots[0]; next_slot: @@ -5378,13 +5236,12 @@ next_slot: } if (ins_nr) { ret = copy_items(trans, inode, dst_path, path, - &last_extent, ins_start_slot, + ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } - ret = 0; ins_nr = 0; } btrfs_release_path(path); @@ -5399,14 +5256,13 @@ next_key: } } if (ins_nr) { - ret = copy_items(trans, inode, dst_path, path, &last_extent, + ret = copy_items(trans, inode, dst_path, path, ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } - ret = 0; ins_nr = 0; } @@ -5419,7 +5275,7 @@ next_key: if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { btrfs_release_path(path); btrfs_release_path(dst_path); - err = btrfs_log_trailing_hole(trans, root, inode, path); + err = btrfs_log_holes(trans, root, inode, path); if (err) goto out_unlock; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 97f1ba7c18b2..f7d9fc1a6fc2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -881,17 +881,28 @@ static struct btrfs_fs_devices *find_fsid_changed( /* * Handles the case where scanned device is part of an fs that had * multiple successful changes of FSID but curently device didn't - * observe it. Meaning our fsid will be different than theirs. + * observe it. Meaning our fsid will be different than theirs. We need + * to handle two subcases : + * 1 - The fs still continues to have different METADATA/FSID uuids. + * 2 - The fs is switched back to its original FSID (METADATA/FSID + * are equal). */ list_for_each_entry(fs_devices, &fs_uuids, fs_list) { + /* Changed UUIDs */ if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid, BTRFS_FSID_SIZE) != 0 && memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid, BTRFS_FSID_SIZE) == 0 && memcmp(fs_devices->fsid, disk_super->fsid, - BTRFS_FSID_SIZE) != 0) { + BTRFS_FSID_SIZE) != 0) + return fs_devices; + + /* Unchanged UUIDs */ + if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid, + BTRFS_FSID_SIZE) == 0 && + memcmp(fs_devices->fsid, disk_super->metadata_uuid, + BTRFS_FSID_SIZE) == 0) return fs_devices; - } } return NULL; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index e1cac715d19e..06d932ed097e 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -350,9 +350,14 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) } rc = cifs_negotiate_protocol(0, tcon->ses); - if (!rc && tcon->ses->need_reconnect) + if (!rc && tcon->ses->need_reconnect) { rc = cifs_setup_session(0, tcon->ses, nls_codepage); - + if ((rc == -EACCES) && !tcon->retry) { + rc = -EHOSTDOWN; + mutex_unlock(&tcon->ses->session_mutex); + goto failed; + } + } if (rc || !tcon->need_reconnect) { mutex_unlock(&tcon->ses->session_mutex); goto out; @@ -397,6 +402,7 @@ out: case SMB2_SET_INFO: rc = -EAGAIN; } +failed: unload_nls(nls_codepage); return rc; } diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 680aba9c00d5..fd0b5dd68f9e 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -76,14 +76,11 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr) if (ia_valid & ATTR_GID) sd_iattr->ia_gid = iattr->ia_gid; if (ia_valid & ATTR_ATIME) - sd_iattr->ia_atime = timestamp_truncate(iattr->ia_atime, - inode); + sd_iattr->ia_atime = iattr->ia_atime; if (ia_valid & ATTR_MTIME) - sd_iattr->ia_mtime = timestamp_truncate(iattr->ia_mtime, - inode); + sd_iattr->ia_mtime = iattr->ia_mtime; if (ia_valid & ATTR_CTIME) - sd_iattr->ia_ctime = timestamp_truncate(iattr->ia_ctime, - inode); + sd_iattr->ia_ctime = iattr->ia_ctime; if (ia_valid & ATTR_MODE) { umode_t mode = iattr->ia_mode; diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index c34fa7c61b43..4ee65b2b6247 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -664,9 +664,6 @@ static int check_for_busy_inodes(struct super_block *sb, struct list_head *pos; size_t busy_count = 0; unsigned long ino; - struct dentry *dentry; - char _path[256]; - char *path = NULL; spin_lock(&mk->mk_decrypted_inodes_lock); @@ -685,22 +682,14 @@ static int check_for_busy_inodes(struct super_block *sb, struct fscrypt_info, ci_master_key_link)->ci_inode; ino = inode->i_ino; - dentry = d_find_alias(inode); } spin_unlock(&mk->mk_decrypted_inodes_lock); - if (dentry) { - path = dentry_path(dentry, _path, sizeof(_path)); - dput(dentry); - } - if (IS_ERR_OR_NULL(path)) - path = "(unknown)"; - fscrypt_warn(NULL, - "%s: %zu inode(s) still busy after removing key with %s %*phN, including ino %lu (%s)", + "%s: %zu inode(s) still busy after removing key with %s %*phN, including ino %lu", sb->s_id, busy_count, master_key_spec_type(&mk->mk_spec), master_key_spec_len(&mk->mk_spec), (u8 *)&mk->mk_spec.u, - ino, path); + ino); return -EBUSY; } diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 19f89f9fb10c..23b74b8e8f96 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -306,24 +306,22 @@ static int z_erofs_shifted_transform(const struct z_erofs_decompress_req *rq, } src = kmap_atomic(*rq->in); - if (!rq->out[0]) { - dst = NULL; - } else { + if (rq->out[0]) { dst = kmap_atomic(rq->out[0]); memcpy(dst + rq->pageofs_out, src, righthalf); + kunmap_atomic(dst); } - if (rq->out[1] == *rq->in) { - memmove(src, src + righthalf, rq->pageofs_out); - } else if (nrpages_out == 2) { - if (dst) - kunmap_atomic(dst); + if (nrpages_out == 2) { DBG_BUGON(!rq->out[1]); - dst = kmap_atomic(rq->out[1]); - memcpy(dst, src + righthalf, rq->pageofs_out); + if (rq->out[1] == *rq->in) { + memmove(src, src + righthalf, rq->pageofs_out); + } else { + dst = kmap_atomic(rq->out[1]); + memcpy(dst, src + righthalf, rq->pageofs_out); + kunmap_atomic(dst); + } } - if (dst) - kunmap_atomic(dst); kunmap_atomic(src); return 0; } diff --git a/fs/eventfd.c b/fs/eventfd.c index 8aa0ea8c55e8..78e41c7c3d05 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -24,6 +24,8 @@ #include #include +DEFINE_PER_CPU(int, eventfd_wake_count); + static DEFINE_IDA(eventfd_ida); struct eventfd_ctx { @@ -60,12 +62,25 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) { unsigned long flags; + /* + * Deadlock or stack overflow issues can happen if we recurse here + * through waitqueue wakeup handlers. If the caller users potentially + * nested waitqueues with custom wakeup handlers, then it should + * check eventfd_signal_count() before calling this function. If + * it returns true, the eventfd_signal() call should be deferred to a + * safe context. + */ + if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count))) + return 0; + spin_lock_irqsave(&ctx->wqh.lock, flags); + this_cpu_inc(eventfd_wake_count); if (ULLONG_MAX - ctx->count < n) n = ULLONG_MAX - ctx->count; ctx->count += n; if (waitqueue_active(&ctx->wqh)) wake_up_locked_poll(&ctx->wqh, EPOLLIN); + this_cpu_dec(eventfd_wake_count); spin_unlock_irqrestore(&ctx->wqh.lock, flags); return n; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 30c630d73f0f..065cd2d1bdc6 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1082,9 +1082,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) if (EXT2_BLOCKS_PER_GROUP(sb) == 0) goto cantfind_ext2; - sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - - le32_to_cpu(es->s_first_data_block) - 1) - / EXT2_BLOCKS_PER_GROUP(sb)) + 1; + sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - + le32_to_cpu(es->s_first_data_block) - 1) + / EXT2_BLOCKS_PER_GROUP(sb)) + 1; db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) / EXT2_DESC_PER_BLOCK(sb); sbi->s_group_desc = kmalloc_array (db_count, diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 6305d5ec25af..5ef8d7ae231b 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -673,9 +673,11 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { struct qstr qstr = {.name = str, .len = len }; - struct inode *inode = dentry->d_parent->d_inode; + const struct dentry *parent = READ_ONCE(dentry->d_parent); + const struct inode *inode = READ_ONCE(parent->d_inode); - if (!IS_CASEFOLDED(inode) || !EXT4_SB(inode->i_sb)->s_encoding) { + if (!inode || !IS_CASEFOLDED(inode) || + !EXT4_SB(inode->i_sb)->s_encoding) { if (len != name->len) return -1; return memcmp(str, name->name, len); @@ -688,10 +690,11 @@ static int ext4_d_hash(const struct dentry *dentry, struct qstr *str) { const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb); const struct unicode_map *um = sbi->s_encoding; + const struct inode *inode = READ_ONCE(dentry->d_inode); unsigned char *norm; int len, ret = 0; - if (!IS_CASEFOLDED(dentry->d_inode) || !um) + if (!inode || !IS_CASEFOLDED(inode) || !um) return 0; norm = kmalloc(PATH_MAX, GFP_ATOMIC); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 12ceadef32c5..2cc9f2168b9e 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -478,17 +478,26 @@ int ext4_bio_write_page(struct ext4_io_submit *io, gfp_t gfp_flags = GFP_NOFS; unsigned int enc_bytes = round_up(len, i_blocksize(inode)); + /* + * Since bounce page allocation uses a mempool, we can only use + * a waiting mask (i.e. request guaranteed allocation) on the + * first page of the bio. Otherwise it can deadlock. + */ + if (io->io_bio) + gfp_flags = GFP_NOWAIT | __GFP_NOWARN; retry_encrypt: bounce_page = fscrypt_encrypt_pagecache_blocks(page, enc_bytes, 0, gfp_flags); if (IS_ERR(bounce_page)) { ret = PTR_ERR(bounce_page); - if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { - if (io->io_bio) { + if (ret == -ENOMEM && + (io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) { + gfp_flags = GFP_NOFS; + if (io->io_bio) ext4_io_submit(io); - congestion_wait(BLK_RW_ASYNC, HZ/50); - } - gfp_flags |= __GFP_NOFAIL; + else + gfp_flags |= __GFP_NOFAIL; + congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry_encrypt; } bounce_page = NULL; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 4033778bcbbf..84280ad3786c 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -1068,24 +1068,27 @@ static int f2fs_d_compare(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { struct qstr qstr = {.name = str, .len = len }; + const struct dentry *parent = READ_ONCE(dentry->d_parent); + const struct inode *inode = READ_ONCE(parent->d_inode); - if (!IS_CASEFOLDED(dentry->d_parent->d_inode)) { + if (!inode || !IS_CASEFOLDED(inode)) { if (len != name->len) return -1; - return memcmp(str, name, len); + return memcmp(str, name->name, len); } - return f2fs_ci_compare(dentry->d_parent->d_inode, name, &qstr, false); + return f2fs_ci_compare(inode, name, &qstr, false); } static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str) { struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); const struct unicode_map *um = sbi->s_encoding; + const struct inode *inode = READ_ONCE(dentry->d_inode); unsigned char *norm; int len, ret = 0; - if (!IS_CASEFOLDED(dentry->d_inode)) + if (!inode || !IS_CASEFOLDED(inode)) return 0; norm = f2fs_kmalloc(sbi, PATH_MAX, GFP_ATOMIC); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index fae665691481..72f308790a8e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -751,18 +751,12 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr) inode->i_uid = attr->ia_uid; if (ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; - if (ia_valid & ATTR_ATIME) { - inode->i_atime = timestamp_truncate(attr->ia_atime, - inode); - } - if (ia_valid & ATTR_MTIME) { - inode->i_mtime = timestamp_truncate(attr->ia_mtime, - inode); - } - if (ia_valid & ATTR_CTIME) { - inode->i_ctime = timestamp_truncate(attr->ia_ctime, - inode); - } + if (ia_valid & ATTR_ATIME) + inode->i_atime = attr->ia_atime; + if (ia_valid & ATTR_MTIME) + inode->i_mtime = attr->ia_mtime; + if (ia_valid & ATTR_CTIME) + inode->i_ctime = attr->ia_ctime; if (ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1443cee15863..ea8dbf1458c9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1213,9 +1213,11 @@ static int f2fs_statfs_project(struct super_block *sb, return PTR_ERR(dquot); spin_lock(&dquot->dq_dqb_lock); - limit = (dquot->dq_dqb.dqb_bsoftlimit ? - dquot->dq_dqb.dqb_bsoftlimit : - dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits; + limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit, + dquot->dq_dqb.dqb_bhardlimit); + if (limit) + limit >>= sb->s_blocksize_bits; + if (limit && buf->f_blocks > limit) { curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits; buf->f_blocks = limit; @@ -1224,9 +1226,9 @@ static int f2fs_statfs_project(struct super_block *sb, (buf->f_blocks - curblock) : 0; } - limit = dquot->dq_dqb.dqb_isoftlimit ? - dquot->dq_dqb.dqb_isoftlimit : - dquot->dq_dqb.dqb_ihardlimit; + limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit, + dquot->dq_dqb.dqb_ihardlimit); + if (limit && buf->f_files > limit) { buf->f_files = limit; buf->f_ffree = diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 335607b8c5c0..76ac9c7d32ec 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2063,7 +2063,7 @@ void wb_workfn(struct work_struct *work) struct bdi_writeback, dwork); long pages_written; - set_worker_desc("flush-%s", dev_name(wb->bdi->dev)); + set_worker_desc("flush-%s", bdi_dev_name(wb->bdi)); current->flags |= PF_SWAPWRITE; if (likely(!current_is_workqueue_rescuer() || diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ce715380143c..695369f46f92 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1465,6 +1465,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, } ia = NULL; if (nres < 0) { + iov_iter_revert(iter, nbytes); err = nres; break; } @@ -1473,8 +1474,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, count -= nres; res += nres; pos += nres; - if (nres != nbytes) + if (nres != nbytes) { + iov_iter_revert(iter, nbytes - nres); break; + } if (count) { max_pages = iov_iter_npages(iter, fc->max_pages); ia = fuse_io_alloc(io, max_pages); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 01ff37b76652..4a10b4e7092a 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -833,7 +833,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct gfs2_inode *ip = GFS2_I(inode); - ssize_t written = 0, ret; + ssize_t ret; ret = gfs2_rsqa_alloc(ip); if (ret) @@ -853,68 +853,58 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) inode_lock(inode); ret = generic_write_checks(iocb, from); if (ret <= 0) - goto out; - - /* We can write back this queue in page reclaim */ - current->backing_dev_info = inode_to_bdi(inode); + goto out_unlock; ret = file_remove_privs(file); if (ret) - goto out2; + goto out_unlock; ret = file_update_time(file); if (ret) - goto out2; + goto out_unlock; if (iocb->ki_flags & IOCB_DIRECT) { struct address_space *mapping = file->f_mapping; - loff_t pos, endbyte; - ssize_t buffered; + ssize_t buffered, ret2; - written = gfs2_file_direct_write(iocb, from); - if (written < 0 || !iov_iter_count(from)) - goto out2; + ret = gfs2_file_direct_write(iocb, from); + if (ret < 0 || !iov_iter_count(from)) + goto out_unlock; - ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); - if (unlikely(ret < 0)) - goto out2; - buffered = ret; + iocb->ki_flags |= IOCB_DSYNC; + current->backing_dev_info = inode_to_bdi(inode); + buffered = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); + current->backing_dev_info = NULL; + if (unlikely(buffered <= 0)) + goto out_unlock; /* * We need to ensure that the page cache pages are written to * disk and invalidated to preserve the expected O_DIRECT - * semantics. + * semantics. If the writeback or invalidate fails, only report + * the direct I/O range as we don't know if the buffered pages + * made it to disk. */ - pos = iocb->ki_pos; - endbyte = pos + buffered - 1; - ret = filemap_write_and_wait_range(mapping, pos, endbyte); - if (!ret) { - iocb->ki_pos += buffered; - written += buffered; - invalidate_mapping_pages(mapping, - pos >> PAGE_SHIFT, - endbyte >> PAGE_SHIFT); - } else { - /* - * We don't know how much we wrote, so just return - * the number of bytes which were direct-written - */ - } + iocb->ki_pos += buffered; + ret2 = generic_write_sync(iocb, buffered); + invalidate_mapping_pages(mapping, + (iocb->ki_pos - buffered) >> PAGE_SHIFT, + (iocb->ki_pos - 1) >> PAGE_SHIFT); + if (!ret || ret2 > 0) + ret += ret2; } else { + current->backing_dev_info = inode_to_bdi(inode); ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); - if (likely(ret > 0)) + current->backing_dev_info = NULL; + if (likely(ret > 0)) { iocb->ki_pos += ret; + ret = generic_write_sync(iocb, ret); + } } -out2: - current->backing_dev_info = NULL; -out: +out_unlock: inode_unlock(inode); - if (likely(ret > 0)) { - /* Handle various SYNC-type writes */ - ret = generic_write_sync(iocb, ret); - } - return written ? written : ret; + return ret; } static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index e7b9d39955d4..7ca84be20cf6 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -421,7 +421,7 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd, for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) { if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) { - if (lh.lh_sequence > head->lh_sequence) + if (lh.lh_sequence >= head->lh_sequence) *head = lh; else { ret = true; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 1c58859aa592..ef485f892d1b 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -981,6 +981,7 @@ static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos) static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos) { + (*pos)++; return NULL; } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e180033e35cf..05ed7be8a634 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -162,6 +162,17 @@ typedef struct { bool eof; } nfs_readdir_descriptor_t; +static +void nfs_readdir_init_array(struct page *page) +{ + struct nfs_cache_array *array; + + array = kmap_atomic(page); + memset(array, 0, sizeof(struct nfs_cache_array)); + array->eof_index = -1; + kunmap_atomic(array); +} + /* * we are freeing strings created by nfs_add_to_readdir_array() */ @@ -174,6 +185,7 @@ void nfs_readdir_clear_array(struct page *page) array = kmap_atomic(page); for (i = 0; i < array->size; i++) kfree(array->array[i].string.name); + array->size = 0; kunmap_atomic(array); } @@ -610,6 +622,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, int status = -ENOMEM; unsigned int array_size = ARRAY_SIZE(pages); + nfs_readdir_init_array(page); + entry.prev_cookie = 0; entry.cookie = desc->last_cookie; entry.eof = 0; @@ -626,8 +640,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, } array = kmap(page); - memset(array, 0, sizeof(struct nfs_cache_array)); - array->eof_index = -1; status = nfs_readdir_alloc_pages(pages, array_size); if (status < 0) @@ -682,6 +694,7 @@ int nfs_readdir_filler(void *data, struct page* page) unlock_page(page); return 0; error: + nfs_readdir_clear_array(page); unlock_page(page); return ret; } @@ -689,8 +702,6 @@ int nfs_readdir_filler(void *data, struct page* page) static void cache_page_release(nfs_readdir_descriptor_t *desc) { - if (!desc->page->mapping) - nfs_readdir_clear_array(desc->page); put_page(desc->page); desc->page = NULL; } @@ -704,19 +715,28 @@ struct page *get_cache_page(nfs_readdir_descriptor_t *desc) /* * Returns 0 if desc->dir_cookie was found on page desc->page_index + * and locks the page to prevent removal from the page cache. */ static -int find_cache_page(nfs_readdir_descriptor_t *desc) +int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc) { int res; desc->page = get_cache_page(desc); if (IS_ERR(desc->page)) return PTR_ERR(desc->page); - - res = nfs_readdir_search_array(desc); + res = lock_page_killable(desc->page); if (res != 0) - cache_page_release(desc); + goto error; + res = -EAGAIN; + if (desc->page->mapping != NULL) { + res = nfs_readdir_search_array(desc); + if (res == 0) + return 0; + } + unlock_page(desc->page); +error: + cache_page_release(desc); return res; } @@ -731,7 +751,7 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) desc->last_cookie = 0; } do { - res = find_cache_page(desc); + res = find_and_lock_cache_page(desc); } while (res == -EAGAIN); return res; } @@ -770,7 +790,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) desc->eof = true; kunmap(desc->page); - cache_page_release(desc); dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); return res; @@ -816,13 +835,13 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) status = nfs_do_filldir(desc); + out_release: + nfs_readdir_clear_array(desc->page); + cache_page_release(desc); out: dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); return status; - out_release: - cache_page_release(desc); - goto out; } /* The file offset position represents the dirent entry number. A @@ -887,6 +906,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) break; res = nfs_do_filldir(desc); + unlock_page(desc->page); + cache_page_release(desc); if (res < 0) break; } while (!desc->eof); diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index ef55e9b1cd4e..3007b8945d38 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -791,6 +791,7 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, *new; struct inode *inode; unsigned int hashval; + bool retry = true; /* FIXME: skip this if fh_dentry is already set? */ status = fh_verify(rqstp, fhp, S_IFREG, @@ -826,6 +827,11 @@ wait_for_construction: /* Did construction of this file fail? */ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + if (!retry) { + status = nfserr_jukebox; + goto out; + } + retry = false; nfsd_file_put_noref(nf); goto retry; } diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 2681c70283ce..e12409eca7cc 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -675,7 +675,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) /* Client gets 2 lease periods to return it */ cutoff = ktime_add_ns(task->tk_start, - nn->nfsd4_lease * NSEC_PER_SEC * 2); + (u64)nn->nfsd4_lease * NSEC_PER_SEC * 2); if (ktime_before(now, cutoff)) { rpc_delay(task, HZ/100); /* 10 mili-seconds */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 08f6eb2b73f8..1c82d7dd54df 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -6550,7 +6550,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } if (fl_flags & FL_SLEEP) { - nbl->nbl_time = jiffies; + nbl->nbl_time = get_seconds(); spin_lock(&nn->blocked_locks_lock); list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 46f56afb6cb8..a080789b4d13 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -605,7 +605,7 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b) struct nfsd4_blocked_lock { struct list_head nbl_list; struct list_head nbl_lru; - unsigned long nbl_time; + time_t nbl_time; struct file_lock nbl_lock; struct knfsd_fh nbl_fh; struct nfsd4_callback nbl_cb; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index cf423fea0c6f..fc38b9fe4549 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -975,6 +975,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, host_err = vfs_iter_write(file, &iter, &pos, flags); if (host_err < 0) goto out_nfserr; + *cnt = host_err; nfsdstats.io_write += *cnt; fsnotify_modify(file); diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 6c7388430ad3..d4359a1df3d5 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -2899,18 +2899,12 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr) ia_valid |= ATTR_MTIME | ATTR_CTIME; } } - if (ia_valid & ATTR_ATIME) { - vi->i_atime = timestamp_truncate(attr->ia_atime, - vi); - } - if (ia_valid & ATTR_MTIME) { - vi->i_mtime = timestamp_truncate(attr->ia_mtime, - vi); - } - if (ia_valid & ATTR_CTIME) { - vi->i_ctime = timestamp_truncate(attr->ia_ctime, - vi); - } + if (ia_valid & ATTR_ATIME) + vi->i_atime = attr->ia_atime; + if (ia_valid & ATTR_MTIME) + vi->i_mtime = attr->ia_mtime; + if (ia_valid & ATTR_CTIME) + vi->i_ctime = attr->ia_ctime; mark_inode_dirty(vi); out: return err; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 9876db52913a..6cd5e4924e4d 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2101,17 +2101,15 @@ static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos) static int ocfs2_inode_lock_for_extent_tree(struct inode *inode, struct buffer_head **di_bh, int meta_level, - int overwrite_io, int write_sem, int wait) { int ret = 0; if (wait) - ret = ocfs2_inode_lock(inode, NULL, meta_level); + ret = ocfs2_inode_lock(inode, di_bh, meta_level); else - ret = ocfs2_try_inode_lock(inode, - overwrite_io ? NULL : di_bh, meta_level); + ret = ocfs2_try_inode_lock(inode, di_bh, meta_level); if (ret < 0) goto out; @@ -2136,6 +2134,7 @@ static int ocfs2_inode_lock_for_extent_tree(struct inode *inode, out_unlock: brelse(*di_bh); + *di_bh = NULL; ocfs2_inode_unlock(inode, meta_level); out: return ret; @@ -2177,7 +2176,6 @@ static int ocfs2_prepare_inode_for_write(struct file *file, ret = ocfs2_inode_lock_for_extent_tree(inode, &di_bh, meta_level, - overwrite_io, write_sem, wait); if (ret < 0) { @@ -2233,13 +2231,13 @@ static int ocfs2_prepare_inode_for_write(struct file *file, &di_bh, meta_level, write_sem); + meta_level = 1; + write_sem = 1; ret = ocfs2_inode_lock_for_extent_tree(inode, &di_bh, meta_level, - overwrite_io, - 1, + write_sem, wait); - write_sem = 1; if (ret < 0) { if (ret != -EAGAIN) mlog_errno(ret); diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index e235a635d9ec..15e4fa288475 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -146,7 +146,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) struct inode *inode = file_inode(file); struct fd real; const struct cred *old_cred; - ssize_t ret; + loff_t ret; /* * The two special cases below do not need to involve real fs, diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 47a91c9733a5..7255e6a5838f 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -504,7 +504,13 @@ get: if (err) goto fail; - WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev); + /* + * Directory inode is always on overlay st_dev. + * Non-dir with ovl_same_dev() could be on pseudo st_dev in case + * of xino bits overflow. + */ + WARN_ON_ONCE(S_ISDIR(stat.mode) && + dir->d_sb->s_dev != stat.dev); ino = stat.ino; } else if (xinobits && !OVL_TYPE_UPPER(type)) { ino = ovl_remap_lower_ino(ino, xinobits, diff --git a/fs/read_write.c b/fs/read_write.c index 5bbf587f5bc1..7458fccc59e1 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1777,10 +1777,9 @@ static int remap_verify_area(struct file *file, loff_t pos, loff_t len, * else. Assume that the offsets have already been checked for block * alignment. * - * For deduplication we always scale down to the previous block because we - * can't meaningfully compare post-EOF contents. - * - * For clone we only link a partial EOF block above the destination file's EOF. + * For clone we only link a partial EOF block above or at the destination file's + * EOF. For deduplication we accept a partial EOF block only if it ends at the + * destination file's EOF (can not link it into the middle of a file). * * Shorten the request if possible. */ @@ -1796,8 +1795,7 @@ static int generic_remap_check_len(struct inode *inode_in, if ((*len & blkmask) == 0) return 0; - if ((remap_flags & REMAP_FILE_DEDUP) || - pos_out + *len < i_size_read(inode_out)) + if (pos_out + *len < i_size_read(inode_out)) new_len &= ~blkmask; if (new_len == *len) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 0b98e3c8b461..6c0e19f7a21f 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -228,6 +228,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, if (nm.hash) { ubifs_assert(c, fname_len(&nm) == 0); ubifs_assert(c, fname_name(&nm) == NULL); + if (nm.hash & ~UBIFS_S_KEY_HASH_MASK) + goto done; /* ENOENT */ dent_key_init_hash(c, &key, dir->i_ino, nm.hash); err = ubifs_tnc_lookup_dh(c, &key, dent, nm.minor_hash); } else { diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index cd52585c8f4f..a771273fba7e 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -786,7 +786,9 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu, if (page_offset > end_index) break; - page = find_or_create_page(mapping, page_offset, ra_gfp_mask); + page = pagecache_get_page(mapping, page_offset, + FGP_LOCK|FGP_ACCESSED|FGP_CREAT|FGP_NOWAIT, + ra_gfp_mask); if (!page) break; if (!PageUptodate(page)) @@ -1078,18 +1080,12 @@ static void do_attr_changes(struct inode *inode, const struct iattr *attr) inode->i_uid = attr->ia_uid; if (attr->ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; - if (attr->ia_valid & ATTR_ATIME) { - inode->i_atime = timestamp_truncate(attr->ia_atime, - inode); - } - if (attr->ia_valid & ATTR_MTIME) { - inode->i_mtime = timestamp_truncate(attr->ia_mtime, - inode); - } - if (attr->ia_valid & ATTR_CTIME) { - inode->i_ctime = timestamp_truncate(attr->ia_ctime, - inode); - } + if (attr->ia_valid & ATTR_ATIME) + inode->i_atime = attr->ia_atime; + if (attr->ia_valid & ATTR_MTIME) + inode->i_mtime = attr->ia_mtime; + if (attr->ia_valid & ATTR_CTIME) + inode->i_ctime = attr->ia_ctime; if (attr->ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 5dc5abca11c7..eeb1be259888 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -113,7 +113,8 @@ static int setflags(struct inode *inode, int flags) if (err) goto out_unlock; - ui->flags = ioctl2ubifs(flags); + ui->flags &= ~ioctl2ubifs(UBIFS_SUPPORTED_IOCTL_FLAGS); + ui->flags |= ioctl2ubifs(flags); ubifs_set_inode_flags(inode); inode->i_ctime = current_time(inode); release = ui->dirty; diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index a551eb3e9b89..6681c18e52b8 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -161,7 +161,7 @@ static int create_default_filesystem(struct ubifs_info *c) sup = kzalloc(ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size), GFP_KERNEL); mst = kzalloc(c->mst_node_alsz, GFP_KERNEL); idx_node_size = ubifs_idx_node_sz(c, 1); - idx = kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL); + idx = kzalloc(ALIGN(idx_node_size, c->min_io_size), GFP_KERNEL); ino = kzalloc(ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size), GFP_KERNEL); cs = kzalloc(ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size), GFP_KERNEL); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 5e1e8ec0589e..7fc2f3f07c16 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1599,6 +1599,7 @@ out_free: vfree(c->ileb_buf); vfree(c->sbuf); kfree(c->bottom_up_buf); + kfree(c->sup_node); ubifs_debugging_exit(c); return err; } @@ -1641,6 +1642,7 @@ static void ubifs_umount(struct ubifs_info *c) vfree(c->ileb_buf); vfree(c->sbuf); kfree(c->bottom_up_buf); + kfree(c->sup_node); ubifs_debugging_exit(c); } diff --git a/fs/utimes.c b/fs/utimes.c index 1ba3f7883870..090739322463 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -36,14 +36,14 @@ static int utimes_common(const struct path *path, struct timespec64 *times) if (times[0].tv_nsec == UTIME_OMIT) newattrs.ia_valid &= ~ATTR_ATIME; else if (times[0].tv_nsec != UTIME_NOW) { - newattrs.ia_atime = timestamp_truncate(times[0], inode); + newattrs.ia_atime = times[0]; newattrs.ia_valid |= ATTR_ATIME_SET; } if (times[1].tv_nsec == UTIME_OMIT) newattrs.ia_valid &= ~ATTR_MTIME; else if (times[1].tv_nsec != UTIME_NOW) { - newattrs.ia_mtime = timestamp_truncate(times[1], inode); + newattrs.ia_mtime = times[1]; newattrs.ia_valid |= ATTR_MTIME_SET; } /* diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 04c0644006fd..c716ea81e653 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -137,13 +137,6 @@ * When used, an architecture is expected to provide __tlb_remove_table() * which does the actual freeing of these pages. * - * HAVE_RCU_TABLE_NO_INVALIDATE - * - * This makes HAVE_RCU_TABLE_FREE avoid calling tlb_flush_mmu_tlbonly() before - * freeing the page-table pages. This can be avoided if you use - * HAVE_RCU_TABLE_FREE and your architecture does _NOT_ use the Linux - * page-tables natively. - * * MMU_GATHER_NO_RANGE * * Use this if your architecture lacks an efficient flush_tlb_range(). @@ -189,8 +182,23 @@ struct mmu_table_batch { extern void tlb_remove_table(struct mmu_gather *tlb, void *table); +/* + * This allows an architecture that does not use the linux page-tables for + * hardware to skip the TLBI when freeing page tables. + */ +#ifndef tlb_needs_table_invalidate +#define tlb_needs_table_invalidate() (true) +#endif + +#else + +#ifdef tlb_needs_table_invalidate +#error tlb_needs_table_invalidate() requires HAVE_RCU_TABLE_FREE #endif +#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ + + #ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER /* * If we can't allocate a page to make a big batch of page pointers diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 97967ce06de3..f88197c1ffc2 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -504,4 +505,13 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi) (1 << WB_async_congested)); } +extern const char *bdi_unknown_name; + +static inline const char *bdi_dev_name(struct backing_dev_info *bdi) +{ + if (!bdi || !bdi->dev) + return bdi_unknown_name; + return dev_name(bdi->dev); +} + #endif /* _LINUX_BACKING_DEV_H */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 31b1b0e03df8..018dce868de6 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -148,6 +148,20 @@ struct cpufreq_policy { struct notifier_block nb_max; }; +/* + * Used for passing new cpufreq policy data to the cpufreq driver's ->verify() + * callback for sanitization. That callback is only expected to modify the min + * and max values, if necessary, and specifically it must not update the + * frequency table. + */ +struct cpufreq_policy_data { + struct cpufreq_cpuinfo cpuinfo; + struct cpufreq_frequency_table *freq_table; + unsigned int cpu; + unsigned int min; /* in kHz */ + unsigned int max; /* in kHz */ +}; + struct cpufreq_freqs { struct cpufreq_policy *policy; unsigned int old; @@ -201,8 +215,6 @@ u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy); struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu); void cpufreq_cpu_release(struct cpufreq_policy *policy); int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); -int cpufreq_set_policy(struct cpufreq_policy *policy, - struct cpufreq_policy *new_policy); void refresh_frequency_limits(struct cpufreq_policy *policy); void cpufreq_update_policy(unsigned int cpu); void cpufreq_update_limits(unsigned int cpu); @@ -284,7 +296,7 @@ struct cpufreq_driver { /* needed by all drivers */ int (*init)(struct cpufreq_policy *policy); - int (*verify)(struct cpufreq_policy *policy); + int (*verify)(struct cpufreq_policy_data *policy); /* define one out of two */ int (*setpolicy)(struct cpufreq_policy *policy); @@ -415,8 +427,9 @@ static inline int cpufreq_thermal_control_enabled(struct cpufreq_driver *drv) (drv->flags & CPUFREQ_IS_COOLING_DEV); } -static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, - unsigned int min, unsigned int max) +static inline void cpufreq_verify_within_limits(struct cpufreq_policy_data *policy, + unsigned int min, + unsigned int max) { if (policy->min < min) policy->min = min; @@ -432,10 +445,10 @@ static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, } static inline void -cpufreq_verify_within_cpu_limits(struct cpufreq_policy *policy) +cpufreq_verify_within_cpu_limits(struct cpufreq_policy_data *policy) { cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, - policy->cpuinfo.max_freq); + policy->cpuinfo.max_freq); } #ifdef CONFIG_CPU_FREQ @@ -513,6 +526,7 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, * CPUFREQ GOVERNORS * *********************************************************************/ +#define CPUFREQ_POLICY_UNKNOWN (0) /* * If (cpufreq_driver->target) exists, the ->governor decides what frequency * within the limits is used. If (cpufreq_driver->setpolicy> exists, these @@ -684,9 +698,9 @@ static inline void dev_pm_opp_free_cpufreq_table(struct device *dev, int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table); -int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, +int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy, struct cpufreq_frequency_table *table); -int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy); +int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, unsigned int target_freq, diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index ffcc7724ca21..dc4fd8a6644d 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -12,6 +12,8 @@ #include #include #include +#include +#include /* * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining @@ -40,6 +42,13 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt); +DECLARE_PER_CPU(int, eventfd_wake_count); + +static inline bool eventfd_signal_count(void) +{ + return this_cpu_read(eventfd_wake_count); +} + #else /* CONFIG_EVENTFD */ /* @@ -68,6 +77,11 @@ static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, return -ENOSYS; } +static inline bool eventfd_signal_count(void) +{ + return false; +} + #endif #endif /* _LINUX_EVENTFD_H */ diff --git a/include/linux/irq.h b/include/linux/irq.h index fb301cf29148..f8755e5fcd74 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -209,6 +209,8 @@ struct irq_data { * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set * IRQD_CAN_RESERVE - Can use reservation mode + * IRQD_MSI_NOMASK_QUIRK - Non-maskable MSI quirk for affinity change + * required */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -231,6 +233,7 @@ enum { IRQD_SINGLE_TARGET = (1 << 24), IRQD_DEFAULT_TRIGGER_SET = (1 << 25), IRQD_CAN_RESERVE = (1 << 26), + IRQD_MSI_NOMASK_QUIRK = (1 << 27), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -390,6 +393,21 @@ static inline bool irqd_can_reserve(struct irq_data *d) return __irqd_to_state(d) & IRQD_CAN_RESERVE; } +static inline void irqd_set_msi_nomask_quirk(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_MSI_NOMASK_QUIRK; +} + +static inline void irqd_clr_msi_nomask_quirk(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_MSI_NOMASK_QUIRK; +} + +static inline bool irqd_msi_nomask_quirk(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 583e7abd07f9..aba5ada373d6 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -205,6 +205,13 @@ enum { /* Irq domain implements MSI remapping */ IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5), + /* + * Quirk to handle MSI implementations which do not provide + * masking. Currently known to affect x86, but partially + * handled in core code. + */ + IRQ_DOMAIN_MSI_NOMASK_QUIRK = (1 << 6), + /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d41c521a39da..b81f0f1ded5f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -204,7 +204,7 @@ struct kvm_async_pf { struct list_head queue; struct kvm_vcpu *vcpu; struct mm_struct *mm; - gva_t gva; + gpa_t cr2_or_gpa; unsigned long addr; struct kvm_arch_async_pf arch; bool wakeup_all; @@ -212,8 +212,8 @@ struct kvm_async_pf { void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu); -int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, - struct kvm_arch_async_pf *arch); +int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + unsigned long hva, struct kvm_arch_async_pf *arch); int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif @@ -728,6 +728,7 @@ void kvm_set_pfn_dirty(kvm_pfn_t pfn); void kvm_set_pfn_accessed(kvm_pfn_t pfn); void kvm_get_pfn(kvm_pfn_t pfn); +void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache); int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int len); int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, @@ -750,7 +751,7 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); -unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); +unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); @@ -758,8 +759,12 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn); kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map); +int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map, + struct gfn_to_pfn_cache *cache, bool atomic); struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn); void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty); +int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, + struct gfn_to_pfn_cache *cache, bool dirty, bool atomic); unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index bde5374ae021..2382cb58969d 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -18,7 +18,7 @@ struct kvm_memslots; enum kvm_mr_change; -#include +#include /* * Address types: @@ -49,4 +49,11 @@ struct gfn_to_hva_cache { struct kvm_memory_slot *memslot; }; +struct gfn_to_pfn_cache { + u64 generation; + gfn_t gfn; + kvm_pfn_t pfn; + bool dirty; +}; + #endif /* __KVM_TYPES_H__ */ diff --git a/include/linux/mfd/rohm-bd70528.h b/include/linux/mfd/rohm-bd70528.h index 1013e60c5b25..b0109ee6dae2 100644 --- a/include/linux/mfd/rohm-bd70528.h +++ b/include/linux/mfd/rohm-bd70528.h @@ -317,7 +317,7 @@ enum { #define BD70528_MASK_RTC_MINUTE 0x7f #define BD70528_MASK_RTC_HOUR_24H 0x80 #define BD70528_MASK_RTC_HOUR_PM 0x20 -#define BD70528_MASK_RTC_HOUR 0x1f +#define BD70528_MASK_RTC_HOUR 0x3f #define BD70528_MASK_RTC_DAY 0x3f #define BD70528_MASK_RTC_WEEK 0x07 #define BD70528_MASK_RTC_MONTH 0x1f diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 0836fe232f97..0cdc8d12785a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1417,14 +1417,15 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_440[0x20]; - u8 tls[0x1]; - u8 reserved_at_461[0x2]; + u8 reserved_at_460[0x3]; u8 log_max_uctx[0x5]; u8 reserved_at_468[0x3]; u8 log_max_umem[0x5]; u8 max_num_eqs[0x10]; - u8 reserved_at_480[0x3]; + u8 reserved_at_480[0x1]; + u8 tls_tx[0x1]; + u8 reserved_at_482[0x1]; u8 log_max_l2_table[0x5]; u8 reserved_at_488[0x8]; u8 log_uar_page_sz[0x10]; diff --git a/include/linux/padata.h b/include/linux/padata.h index 23717eeaad23..cccab7a59787 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -9,6 +9,7 @@ #ifndef PADATA_H #define PADATA_H +#include #include #include #include @@ -98,7 +99,7 @@ struct padata_cpumask { * struct parallel_data - Internal control structure, covers everything * that depends on the cpumask in use. * - * @pinst: padata instance. + * @sh: padata_shell object. * @pqueue: percpu padata queues used for parallelization. * @squeue: percpu padata queues used for serialuzation. * @reorder_objects: Number of objects waiting in the reorder queues. @@ -111,7 +112,7 @@ struct padata_cpumask { * @lock: Reorder lock. */ struct parallel_data { - struct padata_instance *pinst; + struct padata_shell *ps; struct padata_parallel_queue __percpu *pqueue; struct padata_serial_queue __percpu *squeue; atomic_t reorder_objects; @@ -124,14 +125,33 @@ struct parallel_data { spinlock_t lock ____cacheline_aligned; }; +/** + * struct padata_shell - Wrapper around struct parallel_data, its + * purpose is to allow the underlying control structure to be replaced + * on the fly using RCU. + * + * @pinst: padat instance. + * @pd: Actual parallel_data structure which may be substituted on the fly. + * @opd: Pointer to old pd to be freed by padata_replace. + * @list: List entry in padata_instance list. + */ +struct padata_shell { + struct padata_instance *pinst; + struct parallel_data __rcu *pd; + struct parallel_data *opd; + struct list_head list; +}; + /** * struct padata_instance - The overall control structure. * * @cpu_notifier: cpu hotplug notifier. * @parallel_wq: The workqueue used for parallel work. * @serial_wq: The workqueue used for serial work. - * @pd: The internal control structure. + * @pslist: List of padata_shell objects attached to this instance. * @cpumask: User supplied cpumasks for parallel and serial works. + * @rcpumask: Actual cpumasks based on user cpumask and cpu_online_mask. + * @omask: Temporary storage used to compute the notification mask. * @cpumask_change_notifier: Notifiers chain for user-defined notify * callbacks that will be called when either @pcpu or @cbcpu * or both cpumasks change. @@ -143,8 +163,10 @@ struct padata_instance { struct hlist_node node; struct workqueue_struct *parallel_wq; struct workqueue_struct *serial_wq; - struct parallel_data *pd; + struct list_head pslist; struct padata_cpumask cpumask; + struct padata_cpumask rcpumask; + cpumask_var_t omask; struct blocking_notifier_head cpumask_change_notifier; struct kobject kobj; struct mutex lock; @@ -156,7 +178,9 @@ struct padata_instance { extern struct padata_instance *padata_alloc_possible(const char *name); extern void padata_free(struct padata_instance *pinst); -extern int padata_do_parallel(struct padata_instance *pinst, +extern struct padata_shell *padata_alloc_shell(struct padata_instance *pinst); +extern void padata_free_shell(struct padata_shell *ps); +extern int padata_do_parallel(struct padata_shell *ps, struct padata_priv *padata, int *cb_cpu); extern void padata_do_serial(struct padata_priv *padata); extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index a6fabd865211..176bfbd52d97 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -175,8 +175,7 @@ * Declaration/definition used for per-CPU variables that should be accessed * as decrypted when memory encryption is enabled in the guest. */ -#if defined(CONFIG_VIRTUALIZATION) && defined(CONFIG_AMD_MEM_ENCRYPT) - +#ifdef CONFIG_AMD_MEM_ENCRYPT #define DECLARE_PER_CPU_DECRYPTED(type, name) \ DECLARE_PER_CPU_SECTION(type, name, "..decrypted") diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 337a46391527..6a92fd3105a3 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -287,6 +287,8 @@ void regulator_bulk_set_supply_names(struct regulator_bulk_data *consumers, const char *const *supply_names, unsigned int num_supplies); +bool regulator_is_equal(struct regulator *reg1, struct regulator *reg2); + #else /* @@ -593,6 +595,11 @@ regulator_bulk_set_supply_names(struct regulator_bulk_data *consumers, { } +static inline bool +regulator_is_equal(struct regulator *reg1, struct regulator *reg2) +{ + return false; +} #endif static inline int regulator_set_voltage_triplet(struct regulator *regulator, diff --git a/include/media/v4l2-rect.h b/include/media/v4l2-rect.h index c86474dc7b55..8800a640c224 100644 --- a/include/media/v4l2-rect.h +++ b/include/media/v4l2-rect.h @@ -63,10 +63,10 @@ static inline void v4l2_rect_map_inside(struct v4l2_rect *r, r->left = boundary->left; if (r->top < boundary->top) r->top = boundary->top; - if (r->left + r->width > boundary->width) - r->left = boundary->width - r->width; - if (r->top + r->height > boundary->height) - r->top = boundary->height - r->height; + if (r->left + r->width > boundary->left + boundary->width) + r->left = boundary->left + boundary->width - r->width; + if (r->top + r->height > boundary->top + boundary->height) + r->top = boundary->top + boundary->height - r->height; } /** diff --git a/include/net/ipx.h b/include/net/ipx.h index baf090390998..9d1342807b59 100644 --- a/include/net/ipx.h +++ b/include/net/ipx.h @@ -47,11 +47,6 @@ struct ipxhdr { /* From af_ipx.c */ extern int sysctl_ipx_pprop_broadcasting; -static __inline__ struct ipxhdr *ipx_hdr(struct sk_buff *skb) -{ - return (struct ipxhdr *)skb_transport_header(skb); -} - struct ipx_interface { /* IPX address */ __be32 if_netnum; diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h index e05b95e83d5a..fb9dce4c6928 100644 --- a/include/sound/hdaudio.h +++ b/include/sound/hdaudio.h @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -330,6 +331,7 @@ struct hdac_bus { bool chip_init:1; /* h/w initialized */ /* behavior flags */ + bool aligned_mmio:1; /* aligned MMIO access */ bool sync_write:1; /* sync after verb write */ bool use_posbuf:1; /* use position buffer */ bool snoop:1; /* enable snooping */ @@ -405,34 +407,61 @@ void snd_hdac_bus_free_stream_pages(struct hdac_bus *bus); unsigned int snd_hdac_aligned_read(void __iomem *addr, unsigned int mask); void snd_hdac_aligned_write(unsigned int val, void __iomem *addr, unsigned int mask); -#define snd_hdac_reg_writeb(v, addr) snd_hdac_aligned_write(v, addr, 0xff) -#define snd_hdac_reg_writew(v, addr) snd_hdac_aligned_write(v, addr, 0xffff) -#define snd_hdac_reg_readb(addr) snd_hdac_aligned_read(addr, 0xff) -#define snd_hdac_reg_readw(addr) snd_hdac_aligned_read(addr, 0xffff) -#else /* CONFIG_SND_HDA_ALIGNED_MMIO */ -#define snd_hdac_reg_writeb(val, addr) writeb(val, addr) -#define snd_hdac_reg_writew(val, addr) writew(val, addr) -#define snd_hdac_reg_readb(addr) readb(addr) -#define snd_hdac_reg_readw(addr) readw(addr) -#endif /* CONFIG_SND_HDA_ALIGNED_MMIO */ -#define snd_hdac_reg_writel(val, addr) writel(val, addr) -#define snd_hdac_reg_readl(addr) readl(addr) +#define snd_hdac_aligned_mmio(bus) (bus)->aligned_mmio +#else +#define snd_hdac_aligned_mmio(bus) false +#define snd_hdac_aligned_read(addr, mask) 0 +#define snd_hdac_aligned_write(val, addr, mask) do {} while (0) +#endif + +static inline void snd_hdac_reg_writeb(struct hdac_bus *bus, void __iomem *addr, + u8 val) +{ + if (snd_hdac_aligned_mmio(bus)) + snd_hdac_aligned_write(val, addr, 0xff); + else + writeb(val, addr); +} + +static inline void snd_hdac_reg_writew(struct hdac_bus *bus, void __iomem *addr, + u16 val) +{ + if (snd_hdac_aligned_mmio(bus)) + snd_hdac_aligned_write(val, addr, 0xffff); + else + writew(val, addr); +} + +static inline u8 snd_hdac_reg_readb(struct hdac_bus *bus, void __iomem *addr) +{ + return snd_hdac_aligned_mmio(bus) ? + snd_hdac_aligned_read(addr, 0xff) : readb(addr); +} + +static inline u16 snd_hdac_reg_readw(struct hdac_bus *bus, void __iomem *addr) +{ + return snd_hdac_aligned_mmio(bus) ? + snd_hdac_aligned_read(addr, 0xffff) : readw(addr); +} + +#define snd_hdac_reg_writel(bus, addr, val) writel(val, addr) +#define snd_hdac_reg_readl(bus, addr) readl(addr) /* * macros for easy use */ #define _snd_hdac_chip_writeb(chip, reg, value) \ - snd_hdac_reg_writeb(value, (chip)->remap_addr + (reg)) + snd_hdac_reg_writeb(chip, (chip)->remap_addr + (reg), value) #define _snd_hdac_chip_readb(chip, reg) \ - snd_hdac_reg_readb((chip)->remap_addr + (reg)) + snd_hdac_reg_readb(chip, (chip)->remap_addr + (reg)) #define _snd_hdac_chip_writew(chip, reg, value) \ - snd_hdac_reg_writew(value, (chip)->remap_addr + (reg)) + snd_hdac_reg_writew(chip, (chip)->remap_addr + (reg), value) #define _snd_hdac_chip_readw(chip, reg) \ - snd_hdac_reg_readw((chip)->remap_addr + (reg)) + snd_hdac_reg_readw(chip, (chip)->remap_addr + (reg)) #define _snd_hdac_chip_writel(chip, reg, value) \ - snd_hdac_reg_writel(value, (chip)->remap_addr + (reg)) + snd_hdac_reg_writel(chip, (chip)->remap_addr + (reg), value) #define _snd_hdac_chip_readl(chip, reg) \ - snd_hdac_reg_readl((chip)->remap_addr + (reg)) + snd_hdac_reg_readl(chip, (chip)->remap_addr + (reg)) /* read/write a register, pass without AZX_REG_ prefix */ #define snd_hdac_chip_writel(chip, reg, value) \ @@ -540,17 +569,17 @@ int snd_hdac_get_stream_stripe_ctl(struct hdac_bus *bus, */ /* read/write a register, pass without AZX_REG_ prefix */ #define snd_hdac_stream_writel(dev, reg, value) \ - snd_hdac_reg_writel(value, (dev)->sd_addr + AZX_REG_ ## reg) + snd_hdac_reg_writel((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg, value) #define snd_hdac_stream_writew(dev, reg, value) \ - snd_hdac_reg_writew(value, (dev)->sd_addr + AZX_REG_ ## reg) + snd_hdac_reg_writew((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg, value) #define snd_hdac_stream_writeb(dev, reg, value) \ - snd_hdac_reg_writeb(value, (dev)->sd_addr + AZX_REG_ ## reg) + snd_hdac_reg_writeb((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg, value) #define snd_hdac_stream_readl(dev, reg) \ - snd_hdac_reg_readl((dev)->sd_addr + AZX_REG_ ## reg) + snd_hdac_reg_readl((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg) #define snd_hdac_stream_readw(dev, reg) \ - snd_hdac_reg_readw((dev)->sd_addr + AZX_REG_ ## reg) + snd_hdac_reg_readw((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg) #define snd_hdac_stream_readb(dev, reg) \ - snd_hdac_reg_readb((dev)->sd_addr + AZX_REG_ ## reg) + snd_hdac_reg_readb((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg) /* update a register, pass without AZX_REG_ prefix */ #define snd_hdac_stream_updatel(dev, reg, mask, val) \ diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index c2ce6480b4b1..66282552db20 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -67,8 +67,8 @@ DECLARE_EVENT_CLASS(writeback_page_template, TP_fast_assign( strscpy_pad(__entry->name, - mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)", - 32); + bdi_dev_name(mapping ? inode_to_bdi(mapping->host) : + NULL), 32); __entry->ino = mapping ? mapping->host->i_ino : 0; __entry->index = page->index; ), @@ -111,8 +111,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template, struct backing_dev_info *bdi = inode_to_bdi(inode); /* may be called for files on pseudo FSes w/ unregistered bdi */ - strscpy_pad(__entry->name, - bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); + strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->flags = flags; @@ -193,7 +192,7 @@ TRACE_EVENT(inode_foreign_history, ), TP_fast_assign( - strncpy(__entry->name, dev_name(inode_to_bdi(inode)->dev), 32); + strncpy(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); __entry->history = history; @@ -222,7 +221,7 @@ TRACE_EVENT(inode_switch_wbs, ), TP_fast_assign( - strncpy(__entry->name, dev_name(old_wb->bdi->dev), 32); + strncpy(__entry->name, bdi_dev_name(old_wb->bdi), 32); __entry->ino = inode->i_ino; __entry->old_cgroup_ino = __trace_wb_assign_cgroup(old_wb); __entry->new_cgroup_ino = __trace_wb_assign_cgroup(new_wb); @@ -255,7 +254,7 @@ TRACE_EVENT(track_foreign_dirty, struct address_space *mapping = page_mapping(page); struct inode *inode = mapping ? mapping->host : NULL; - strncpy(__entry->name, dev_name(wb->bdi->dev), 32); + strncpy(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->bdi_id = wb->bdi->id; __entry->ino = inode ? inode->i_ino : 0; __entry->memcg_id = wb->memcg_css->id; @@ -288,7 +287,7 @@ TRACE_EVENT(flush_foreign, ), TP_fast_assign( - strncpy(__entry->name, dev_name(wb->bdi->dev), 32); + strncpy(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); __entry->frn_bdi_id = frn_bdi_id; __entry->frn_memcg_id = frn_memcg_id; @@ -318,7 +317,7 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template, TP_fast_assign( strscpy_pad(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->sync_mode = wbc->sync_mode; __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); @@ -361,9 +360,7 @@ DECLARE_EVENT_CLASS(writeback_work_class, __field(unsigned int, cgroup_ino) ), TP_fast_assign( - strscpy_pad(__entry->name, - wb->bdi->dev ? dev_name(wb->bdi->dev) : - "(unknown)", 32); + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->nr_pages = work->nr_pages; __entry->sb_dev = work->sb ? work->sb->s_dev : 0; __entry->sync_mode = work->sync_mode; @@ -416,7 +413,7 @@ DECLARE_EVENT_CLASS(writeback_class, __field(unsigned int, cgroup_ino) ), TP_fast_assign( - strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), TP_printk("bdi %s: cgroup_ino=%u", @@ -438,7 +435,7 @@ TRACE_EVENT(writeback_bdi_register, __array(char, name, 32) ), TP_fast_assign( - strscpy_pad(__entry->name, dev_name(bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); ), TP_printk("bdi %s", __entry->name @@ -463,7 +460,7 @@ DECLARE_EVENT_CLASS(wbc_class, ), TP_fast_assign( - strscpy_pad(__entry->name, dev_name(bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); __entry->nr_to_write = wbc->nr_to_write; __entry->pages_skipped = wbc->pages_skipped; __entry->sync_mode = wbc->sync_mode; @@ -514,7 +511,7 @@ TRACE_EVENT(writeback_queue_io, ), TP_fast_assign( unsigned long *older_than_this = work->older_than_this; - strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->older = older_than_this ? *older_than_this : 0; __entry->age = older_than_this ? (jiffies - *older_than_this) * 1000 / HZ : -1; @@ -600,7 +597,7 @@ TRACE_EVENT(bdi_dirty_ratelimit, ), TP_fast_assign( - strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32); __entry->write_bw = KBps(wb->write_bandwidth); __entry->avg_write_bw = KBps(wb->avg_write_bandwidth); __entry->dirty_rate = KBps(dirty_rate); @@ -665,7 +662,7 @@ TRACE_EVENT(balance_dirty_pages, TP_fast_assign( unsigned long freerun = (thresh + bg_thresh) / 2; - strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32); __entry->limit = global_wb_domain.dirty_limit; __entry->setpoint = (global_wb_domain.dirty_limit + @@ -726,7 +723,7 @@ TRACE_EVENT(writeback_sb_inodes_requeue, TP_fast_assign( strscpy_pad(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; @@ -800,7 +797,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, TP_fast_assign( strscpy_pad(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; diff --git a/ipc/msg.c b/ipc/msg.c index 8dec945fa030..767587ab45a3 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -377,7 +377,7 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) * NOTE: no locks must be held, the rwsem is taken inside this function. */ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, - struct msqid64_ds *msqid64) + struct ipc64_perm *perm, int msg_qbytes) { struct kern_ipc_perm *ipcp; struct msg_queue *msq; @@ -387,7 +387,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, rcu_read_lock(); ipcp = ipcctl_obtain_check(ns, &msg_ids(ns), msqid, cmd, - &msqid64->msg_perm, msqid64->msg_qbytes); + perm, msg_qbytes); if (IS_ERR(ipcp)) { err = PTR_ERR(ipcp); goto out_unlock1; @@ -409,18 +409,18 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, { DEFINE_WAKE_Q(wake_q); - if (msqid64->msg_qbytes > ns->msg_ctlmnb && + if (msg_qbytes > ns->msg_ctlmnb && !capable(CAP_SYS_RESOURCE)) { err = -EPERM; goto out_unlock1; } ipc_lock_object(&msq->q_perm); - err = ipc_update_perm(&msqid64->msg_perm, ipcp); + err = ipc_update_perm(perm, ipcp); if (err) goto out_unlock0; - msq->q_qbytes = msqid64->msg_qbytes; + msq->q_qbytes = msg_qbytes; msq->q_ctime = ktime_get_real_seconds(); /* @@ -601,9 +601,10 @@ static long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf, int ver case IPC_SET: if (copy_msqid_from_user(&msqid64, buf, version)) return -EFAULT; - /* fallthru */ + return msgctl_down(ns, msqid, cmd, &msqid64.msg_perm, + msqid64.msg_qbytes); case IPC_RMID: - return msgctl_down(ns, msqid, cmd, &msqid64); + return msgctl_down(ns, msqid, cmd, NULL, 0); default: return -EINVAL; } @@ -735,9 +736,9 @@ static long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr, int versio case IPC_SET: if (copy_compat_msqid_from_user(&msqid64, uptr, version)) return -EFAULT; - /* fallthru */ + return msgctl_down(ns, msqid, cmd, &msqid64.msg_perm, msqid64.msg_qbytes); case IPC_RMID: - return msgctl_down(ns, msqid, cmd, &msqid64); + return msgctl_down(ns, msqid, cmd, NULL, 0); default: return -EINVAL; } diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 3d3d61b5985b..b4b6b77f309c 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -293,7 +293,8 @@ struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key) struct hlist_head *head = dev_map_index_hash(dtab, key); struct bpf_dtab_netdev *dev; - hlist_for_each_entry_rcu(dev, head, index_hlist) + hlist_for_each_entry_rcu(dev, head, index_hlist, + lockdep_is_held(&dtab->index_lock)) if (dev->idx == key) return dev; diff --git a/kernel/events/core.c b/kernel/events/core.c index 6c829e22bad3..15b123bdcaf5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5823,7 +5823,15 @@ accounting: */ user_lock_limit *= num_online_cpus(); - user_locked = atomic_long_read(&user->locked_vm) + user_extra; + user_locked = atomic_long_read(&user->locked_vm); + + /* + * sysctl_perf_event_mlock may have changed, so that + * user->locked_vm > user_lock_limit + */ + if (user_locked > user_lock_limit) + user_locked = user_lock_limit; + user_locked += user_extra; if (user_locked <= user_lock_limit) { /* charge all to locked_vm */ diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index c1eccd4f6520..a949bd39e343 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -114,6 +114,7 @@ static const struct irq_bit_descr irqdata_states[] = { BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED), BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), BIT_MASK_DESCR(IRQD_CAN_RESERVE), + BIT_MASK_DESCR(IRQD_MSI_NOMASK_QUIRK), BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU), diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index dd822fd8a7d5..480df3659720 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1459,6 +1459,7 @@ int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg) if (rv) { /* Restore the original irq_data. */ *root_irq_data = *child_irq_data; + kfree(child_irq_data); goto error; } diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index ad26fbcfbfc8..eb95f6106a1e 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -453,8 +453,11 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, continue; irq_data = irq_domain_get_irq_data(domain, desc->irq); - if (!can_reserve) + if (!can_reserve) { irqd_clr_can_reserve(irq_data); + if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK) + irqd_set_msi_nomask_quirk(irq_data); + } ret = irq_domain_activate_irq(irq_data, can_reserve); if (ret) goto cleanup; diff --git a/kernel/padata.c b/kernel/padata.c index c3fec1413295..9c82ee4a9732 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -35,6 +35,8 @@ #define MAX_OBJ_NUM 1000 +static void padata_free_pd(struct parallel_data *pd); + static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) { int cpu, target_cpu; @@ -87,7 +89,7 @@ static void padata_parallel_worker(struct work_struct *parallel_work) /** * padata_do_parallel - padata parallelization function * - * @pinst: padata instance + * @ps: padatashell * @padata: object to be parallelized * @cb_cpu: pointer to the CPU that the serialization callback function should * run on. If it's not in the serial cpumask of @pinst @@ -98,16 +100,17 @@ static void padata_parallel_worker(struct work_struct *parallel_work) * Note: Every object which is parallelized by padata_do_parallel * must be seen by padata_do_serial. */ -int padata_do_parallel(struct padata_instance *pinst, +int padata_do_parallel(struct padata_shell *ps, struct padata_priv *padata, int *cb_cpu) { + struct padata_instance *pinst = ps->pinst; int i, cpu, cpu_index, target_cpu, err; struct padata_parallel_queue *queue; struct parallel_data *pd; rcu_read_lock_bh(); - pd = rcu_dereference_bh(pinst->pd); + pd = rcu_dereference_bh(ps->pd); err = -EINVAL; if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID) @@ -210,10 +213,10 @@ static struct padata_priv *padata_find_next(struct parallel_data *pd, static void padata_reorder(struct parallel_data *pd) { + struct padata_instance *pinst = pd->ps->pinst; int cb_cpu; struct padata_priv *padata; struct padata_serial_queue *squeue; - struct padata_instance *pinst = pd->pinst; struct padata_parallel_queue *next_queue; /* @@ -283,6 +286,7 @@ static void padata_serial_worker(struct work_struct *serial_work) struct padata_serial_queue *squeue; struct parallel_data *pd; LIST_HEAD(local_list); + int cnt; local_bh_disable(); squeue = container_of(serial_work, struct padata_serial_queue, work); @@ -292,6 +296,8 @@ static void padata_serial_worker(struct work_struct *serial_work) list_replace_init(&squeue->serial.list, &local_list); spin_unlock(&squeue->serial.lock); + cnt = 0; + while (!list_empty(&local_list)) { struct padata_priv *padata; @@ -301,9 +307,12 @@ static void padata_serial_worker(struct work_struct *serial_work) list_del_init(&padata->list); padata->serial(padata); - atomic_dec(&pd->refcnt); + cnt++; } local_bh_enable(); + + if (atomic_sub_and_test(cnt, &pd->refcnt)) + padata_free_pd(pd); } /** @@ -341,36 +350,39 @@ void padata_do_serial(struct padata_priv *padata) } EXPORT_SYMBOL(padata_do_serial); -static int padata_setup_cpumasks(struct parallel_data *pd, - const struct cpumask *pcpumask, - const struct cpumask *cbcpumask) +static int padata_setup_cpumasks(struct padata_instance *pinst) { struct workqueue_attrs *attrs; + int err; + + attrs = alloc_workqueue_attrs(); + if (!attrs) + return -ENOMEM; + + /* Restrict parallel_wq workers to pd->cpumask.pcpu. */ + cpumask_copy(attrs->cpumask, pinst->cpumask.pcpu); + err = apply_workqueue_attrs(pinst->parallel_wq, attrs); + free_workqueue_attrs(attrs); + + return err; +} + +static int pd_setup_cpumasks(struct parallel_data *pd, + const struct cpumask *pcpumask, + const struct cpumask *cbcpumask) +{ int err = -ENOMEM; if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL)) goto out; - cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask); - if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) goto free_pcpu_mask; - cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask); - - attrs = alloc_workqueue_attrs(); - if (!attrs) - goto free_cbcpu_mask; - /* Restrict parallel_wq workers to pd->cpumask.pcpu. */ - cpumask_copy(attrs->cpumask, pd->cpumask.pcpu); - err = apply_workqueue_attrs(pd->pinst->parallel_wq, attrs); - free_workqueue_attrs(attrs); - if (err < 0) - goto free_cbcpu_mask; + cpumask_copy(pd->cpumask.pcpu, pcpumask); + cpumask_copy(pd->cpumask.cbcpu, cbcpumask); return 0; -free_cbcpu_mask: - free_cpumask_var(pd->cpumask.cbcpu); free_pcpu_mask: free_cpumask_var(pd->cpumask.pcpu); out: @@ -414,12 +426,16 @@ static void padata_init_pqueues(struct parallel_data *pd) } /* Allocate and initialize the internal cpumask dependend resources. */ -static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, - const struct cpumask *pcpumask, - const struct cpumask *cbcpumask) +static struct parallel_data *padata_alloc_pd(struct padata_shell *ps) { + struct padata_instance *pinst = ps->pinst; + const struct cpumask *cbcpumask; + const struct cpumask *pcpumask; struct parallel_data *pd; + cbcpumask = pinst->rcpumask.cbcpu; + pcpumask = pinst->rcpumask.pcpu; + pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL); if (!pd) goto err; @@ -432,15 +448,15 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, if (!pd->squeue) goto err_free_pqueue; - pd->pinst = pinst; - if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0) + pd->ps = ps; + if (pd_setup_cpumasks(pd, pcpumask, cbcpumask)) goto err_free_squeue; padata_init_pqueues(pd); padata_init_squeues(pd); atomic_set(&pd->seq_nr, -1); atomic_set(&pd->reorder_objects, 0); - atomic_set(&pd->refcnt, 0); + atomic_set(&pd->refcnt, 1); spin_lock_init(&pd->lock); pd->cpu = cpumask_first(pd->cpumask.pcpu); INIT_WORK(&pd->reorder_work, invoke_padata_reorder); @@ -466,29 +482,6 @@ static void padata_free_pd(struct parallel_data *pd) kfree(pd); } -/* Flush all objects out of the padata queues. */ -static void padata_flush_queues(struct parallel_data *pd) -{ - int cpu; - struct padata_parallel_queue *pqueue; - struct padata_serial_queue *squeue; - - for_each_cpu(cpu, pd->cpumask.pcpu) { - pqueue = per_cpu_ptr(pd->pqueue, cpu); - flush_work(&pqueue->work); - } - - if (atomic_read(&pd->reorder_objects)) - padata_reorder(pd); - - for_each_cpu(cpu, pd->cpumask.cbcpu) { - squeue = per_cpu_ptr(pd->squeue, cpu); - flush_work(&squeue->work); - } - - BUG_ON(atomic_read(&pd->refcnt) != 0); -} - static void __padata_start(struct padata_instance *pinst) { pinst->flags |= PADATA_INIT; @@ -502,39 +495,67 @@ static void __padata_stop(struct padata_instance *pinst) pinst->flags &= ~PADATA_INIT; synchronize_rcu(); - - get_online_cpus(); - padata_flush_queues(pinst->pd); - put_online_cpus(); } /* Replace the internal control structure with a new one. */ -static void padata_replace(struct padata_instance *pinst, - struct parallel_data *pd_new) +static int padata_replace_one(struct padata_shell *ps) { - struct parallel_data *pd_old = pinst->pd; - int notification_mask = 0; + struct parallel_data *pd_new; - pinst->flags |= PADATA_RESET; + pd_new = padata_alloc_pd(ps); + if (!pd_new) + return -ENOMEM; - rcu_assign_pointer(pinst->pd, pd_new); + ps->opd = rcu_dereference_protected(ps->pd, 1); + rcu_assign_pointer(ps->pd, pd_new); - synchronize_rcu(); + return 0; +} + +static int padata_replace(struct padata_instance *pinst, int cpu) +{ + int notification_mask = 0; + struct padata_shell *ps; + int err; + + pinst->flags |= PADATA_RESET; - if (!cpumask_equal(pd_old->cpumask.pcpu, pd_new->cpumask.pcpu)) + cpumask_copy(pinst->omask, pinst->rcpumask.pcpu); + cpumask_and(pinst->rcpumask.pcpu, pinst->cpumask.pcpu, + cpu_online_mask); + if (cpu >= 0) + cpumask_clear_cpu(cpu, pinst->rcpumask.pcpu); + if (!cpumask_equal(pinst->omask, pinst->rcpumask.pcpu)) notification_mask |= PADATA_CPU_PARALLEL; - if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu)) + + cpumask_copy(pinst->omask, pinst->rcpumask.cbcpu); + cpumask_and(pinst->rcpumask.cbcpu, pinst->cpumask.cbcpu, + cpu_online_mask); + if (cpu >= 0) + cpumask_clear_cpu(cpu, pinst->rcpumask.cbcpu); + if (!cpumask_equal(pinst->omask, pinst->rcpumask.cbcpu)) notification_mask |= PADATA_CPU_SERIAL; - padata_flush_queues(pd_old); - padata_free_pd(pd_old); + list_for_each_entry(ps, &pinst->pslist, list) { + err = padata_replace_one(ps); + if (err) + break; + } + + synchronize_rcu(); + + list_for_each_entry_continue_reverse(ps, &pinst->pslist, list) + if (atomic_dec_and_test(&ps->opd->refcnt)) + padata_free_pd(ps->opd); if (notification_mask) blocking_notifier_call_chain(&pinst->cpumask_change_notifier, notification_mask, - &pd_new->cpumask); + &pinst->cpumask); pinst->flags &= ~PADATA_RESET; + + return err; } /** @@ -587,7 +608,7 @@ static int __padata_set_cpumasks(struct padata_instance *pinst, cpumask_var_t cbcpumask) { int valid; - struct parallel_data *pd; + int err; valid = padata_validate_cpumask(pinst, pcpumask); if (!valid) { @@ -600,19 +621,15 @@ static int __padata_set_cpumasks(struct padata_instance *pinst, __padata_stop(pinst); out_replace: - pd = padata_alloc_pd(pinst, pcpumask, cbcpumask); - if (!pd) - return -ENOMEM; - cpumask_copy(pinst->cpumask.pcpu, pcpumask); cpumask_copy(pinst->cpumask.cbcpu, cbcpumask); - padata_replace(pinst, pd); + err = padata_setup_cpumasks(pinst) ?: padata_replace(pinst, -1); if (valid) __padata_start(pinst); - return 0; + return err; } /** @@ -695,46 +712,32 @@ EXPORT_SYMBOL(padata_stop); static int __padata_add_cpu(struct padata_instance *pinst, int cpu) { - struct parallel_data *pd; + int err = 0; if (cpumask_test_cpu(cpu, cpu_online_mask)) { - pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu, - pinst->cpumask.cbcpu); - if (!pd) - return -ENOMEM; - - padata_replace(pinst, pd); + err = padata_replace(pinst, -1); if (padata_validate_cpumask(pinst, pinst->cpumask.pcpu) && padata_validate_cpumask(pinst, pinst->cpumask.cbcpu)) __padata_start(pinst); } - return 0; + return err; } static int __padata_remove_cpu(struct padata_instance *pinst, int cpu) { - struct parallel_data *pd = NULL; + int err = 0; if (cpumask_test_cpu(cpu, cpu_online_mask)) { - if (!padata_validate_cpumask(pinst, pinst->cpumask.pcpu) || !padata_validate_cpumask(pinst, pinst->cpumask.cbcpu)) __padata_stop(pinst); - pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu, - pinst->cpumask.cbcpu); - if (!pd) - return -ENOMEM; - - padata_replace(pinst, pd); - - cpumask_clear_cpu(cpu, pd->cpumask.cbcpu); - cpumask_clear_cpu(cpu, pd->cpumask.pcpu); + err = padata_replace(pinst, cpu); } - return 0; + return err; } /** @@ -817,8 +820,12 @@ static void __padata_free(struct padata_instance *pinst) cpuhp_state_remove_instance_nocalls(hp_online, &pinst->node); #endif + WARN_ON(!list_empty(&pinst->pslist)); + padata_stop(pinst); - padata_free_pd(pinst->pd); + free_cpumask_var(pinst->omask); + free_cpumask_var(pinst->rcpumask.cbcpu); + free_cpumask_var(pinst->rcpumask.pcpu); free_cpumask_var(pinst->cpumask.pcpu); free_cpumask_var(pinst->cpumask.cbcpu); destroy_workqueue(pinst->serial_wq); @@ -965,7 +972,6 @@ static struct padata_instance *padata_alloc(const char *name, const struct cpumask *cbcpumask) { struct padata_instance *pinst; - struct parallel_data *pd = NULL; pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL); if (!pinst) @@ -993,14 +999,22 @@ static struct padata_instance *padata_alloc(const char *name, !padata_validate_cpumask(pinst, cbcpumask)) goto err_free_masks; - pd = padata_alloc_pd(pinst, pcpumask, cbcpumask); - if (!pd) + if (!alloc_cpumask_var(&pinst->rcpumask.pcpu, GFP_KERNEL)) goto err_free_masks; + if (!alloc_cpumask_var(&pinst->rcpumask.cbcpu, GFP_KERNEL)) + goto err_free_rcpumask_pcpu; + if (!alloc_cpumask_var(&pinst->omask, GFP_KERNEL)) + goto err_free_rcpumask_cbcpu; - rcu_assign_pointer(pinst->pd, pd); + INIT_LIST_HEAD(&pinst->pslist); cpumask_copy(pinst->cpumask.pcpu, pcpumask); cpumask_copy(pinst->cpumask.cbcpu, cbcpumask); + cpumask_and(pinst->rcpumask.pcpu, pcpumask, cpu_online_mask); + cpumask_and(pinst->rcpumask.cbcpu, cbcpumask, cpu_online_mask); + + if (padata_setup_cpumasks(pinst)) + goto err_free_omask; pinst->flags = 0; @@ -1016,6 +1030,12 @@ static struct padata_instance *padata_alloc(const char *name, return pinst; +err_free_omask: + free_cpumask_var(pinst->omask); +err_free_rcpumask_cbcpu: + free_cpumask_var(pinst->rcpumask.cbcpu); +err_free_rcpumask_pcpu: + free_cpumask_var(pinst->rcpumask.pcpu); err_free_masks: free_cpumask_var(pinst->cpumask.pcpu); free_cpumask_var(pinst->cpumask.cbcpu); @@ -1054,6 +1074,61 @@ void padata_free(struct padata_instance *pinst) } EXPORT_SYMBOL(padata_free); +/** + * padata_alloc_shell - Allocate and initialize padata shell. + * + * @pinst: Parent padata_instance object. + */ +struct padata_shell *padata_alloc_shell(struct padata_instance *pinst) +{ + struct parallel_data *pd; + struct padata_shell *ps; + + ps = kzalloc(sizeof(*ps), GFP_KERNEL); + if (!ps) + goto out; + + ps->pinst = pinst; + + get_online_cpus(); + pd = padata_alloc_pd(ps); + put_online_cpus(); + + if (!pd) + goto out_free_ps; + + mutex_lock(&pinst->lock); + RCU_INIT_POINTER(ps->pd, pd); + list_add(&ps->list, &pinst->pslist); + mutex_unlock(&pinst->lock); + + return ps; + +out_free_ps: + kfree(ps); +out: + return NULL; +} +EXPORT_SYMBOL(padata_alloc_shell); + +/** + * padata_free_shell - free a padata shell + * + * @ps: padata shell to free + */ +void padata_free_shell(struct padata_shell *ps) +{ + struct padata_instance *pinst = ps->pinst; + + mutex_lock(&pinst->lock); + list_del(&ps->list); + padata_free_pd(rcu_dereference_protected(ps->pd, 1)); + mutex_unlock(&pinst->lock); + + kfree(ps); +} +EXPORT_SYMBOL(padata_free_shell); + #ifdef CONFIG_HOTPLUG_CPU static __init int padata_driver_init(void) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 5dffade2d7cd..21acdff3bd27 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -530,7 +530,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) idx = rcu_seq_state(ssp->srcu_gp_seq); WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); cbdelay = srcu_get_delay(ssp); - ssp->srcu_last_gp_end = ktime_get_mono_fast_ns(); + WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns()); rcu_seq_end(&ssp->srcu_gp_seq); gpseq = rcu_seq_current(&ssp->srcu_gp_seq); if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq)) @@ -762,6 +762,7 @@ static bool srcu_might_be_idle(struct srcu_struct *ssp) unsigned long flags; struct srcu_data *sdp; unsigned long t; + unsigned long tlast; /* If the local srcu_data structure has callbacks, not idle. */ local_irq_save(flags); @@ -780,9 +781,9 @@ static bool srcu_might_be_idle(struct srcu_struct *ssp) /* First, see if enough time has passed since the last GP. */ t = ktime_get_mono_fast_ns(); + tlast = READ_ONCE(ssp->srcu_last_gp_end); if (exp_holdoff == 0 || - time_in_range_open(t, ssp->srcu_last_gp_end, - ssp->srcu_last_gp_end + exp_holdoff)) + time_in_range_open(t, tlast, tlast + exp_holdoff)) return false; /* Too soon after last GP. */ /* Next, check for probable idleness. */ diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index d632cd019597..69c5aa64fcfd 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -134,7 +134,7 @@ static void __maybe_unused sync_exp_reset_tree(void) rcu_for_each_node_breadth_first(rnp) { raw_spin_lock_irqsave_rcu_node(rnp, flags); WARN_ON_ONCE(rnp->expmask); - rnp->expmask = rnp->expmaskinit; + WRITE_ONCE(rnp->expmask, rnp->expmaskinit); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } } @@ -211,7 +211,7 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp, rnp = rnp->parent; raw_spin_lock_rcu_node(rnp); /* irqs already disabled */ WARN_ON_ONCE(!(rnp->expmask & mask)); - rnp->expmask &= ~mask; + WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask); } } @@ -241,7 +241,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp, raw_spin_unlock_irqrestore_rcu_node(rnp, flags); return; } - rnp->expmask &= ~mask; + WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask); __rcu_report_exp_rnp(rnp, wake, flags); /* Releases rnp->lock. */ } @@ -372,12 +372,10 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); /* IPI the remaining CPUs for expedited quiescent state. */ - for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { + for_each_leaf_node_cpu_mask(rnp, cpu, mask_ofl_ipi) { unsigned long mask = leaf_node_cpu_bit(rnp, cpu); struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); - if (!(mask_ofl_ipi & mask)) - continue; retry_ipi: if (rcu_dynticks_in_eqs_since(rdp, rdp->exp_dynticks_snap)) { mask_ofl_test |= mask; @@ -491,7 +489,7 @@ static void synchronize_sched_expedited_wait(void) struct rcu_data *rdp; mask = leaf_node_cpu_bit(rnp, cpu); - if (!(rnp->expmask & mask)) + if (!(READ_ONCE(rnp->expmask) & mask)) continue; ndetected++; rdp = per_cpu_ptr(&rcu_data, cpu); @@ -503,7 +501,8 @@ static void synchronize_sched_expedited_wait(void) } pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n", jiffies - jiffies_start, rcu_state.expedited_sequence, - rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]); + READ_ONCE(rnp_root->expmask), + ".T"[!!rnp_root->exp_tasks]); if (ndetected) { pr_err("blocking rcu_node structures:"); rcu_for_each_node_breadth_first(rnp) { @@ -513,7 +512,7 @@ static void synchronize_sched_expedited_wait(void) continue; pr_cont(" l=%u:%d-%d:%#lx/%c", rnp->level, rnp->grplo, rnp->grphi, - rnp->expmask, + READ_ONCE(rnp->expmask), ".T"[!!rnp->exp_tasks]); } pr_cont("\n"); @@ -521,7 +520,7 @@ static void synchronize_sched_expedited_wait(void) rcu_for_each_leaf_node(rnp) { for_each_leaf_node_possible_cpu(rnp, cpu) { mask = leaf_node_cpu_bit(rnp, cpu); - if (!(rnp->expmask & mask)) + if (!(READ_ONCE(rnp->expmask) & mask)) continue; dump_cpu_task(cpu); } diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index fa08d55f7040..f849e7429816 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -220,7 +220,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) * blocked tasks. */ if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) { - rnp->gp_tasks = &t->rcu_node_entry; + WRITE_ONCE(rnp->gp_tasks, &t->rcu_node_entry); WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq); } if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD)) @@ -340,7 +340,7 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch); */ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) { - return rnp->gp_tasks != NULL; + return READ_ONCE(rnp->gp_tasks) != NULL; } /* Bias and limit values for ->rcu_read_lock_nesting. */ @@ -493,7 +493,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) trace_rcu_unlock_preempted_task(TPS("rcu_preempt"), rnp->gp_seq, t->pid); if (&t->rcu_node_entry == rnp->gp_tasks) - rnp->gp_tasks = np; + WRITE_ONCE(rnp->gp_tasks, np); if (&t->rcu_node_entry == rnp->exp_tasks) rnp->exp_tasks = np; if (IS_ENABLED(CONFIG_RCU_BOOST)) { @@ -612,7 +612,7 @@ static void rcu_read_unlock_special(struct task_struct *t) t->rcu_read_unlock_special.b.exp_hint = false; exp = (t->rcu_blocked_node && t->rcu_blocked_node->exp_tasks) || - (rdp->grpmask & rnp->expmask) || + (rdp->grpmask & READ_ONCE(rnp->expmask)) || tick_nohz_full_cpu(rdp->cpu); // Need to defer quiescent state until everything is enabled. if (irqs_were_disabled && use_softirq && @@ -663,7 +663,7 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) dump_blkd_tasks(rnp, 10); if (rcu_preempt_has_tasks(rnp) && (rnp->qsmaskinit || rnp->wait_blkd_tasks)) { - rnp->gp_tasks = rnp->blkd_tasks.next; + WRITE_ONCE(rnp->gp_tasks, rnp->blkd_tasks.next); t = container_of(rnp->gp_tasks, struct task_struct, rcu_node_entry); trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"), @@ -757,7 +757,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n", __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext); pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n", - __func__, rnp->gp_tasks, rnp->boost_tasks, rnp->exp_tasks); + __func__, READ_ONCE(rnp->gp_tasks), rnp->boost_tasks, + rnp->exp_tasks); pr_info("%s: ->blkd_tasks", __func__); i = 0; list_for_each(lhp, &rnp->blkd_tasks) { diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 451f9d05ccfe..4b11f0309eee 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -88,6 +88,7 @@ static int alarmtimer_rtc_add_device(struct device *dev, unsigned long flags; struct rtc_device *rtc = to_rtc_device(dev); struct wakeup_source *__ws; + int ret = 0; if (rtcdev) return -EBUSY; @@ -102,8 +103,8 @@ static int alarmtimer_rtc_add_device(struct device *dev, spin_lock_irqsave(&rtcdev_lock, flags); if (!rtcdev) { if (!try_module_get(rtc->owner)) { - spin_unlock_irqrestore(&rtcdev_lock, flags); - return -1; + ret = -1; + goto unlock; } rtcdev = rtc; @@ -112,11 +113,12 @@ static int alarmtimer_rtc_add_device(struct device *dev, ws = __ws; __ws = NULL; } +unlock: spin_unlock_irqrestore(&rtcdev_lock, flags); wakeup_source_unregister(__ws); - return 0; + return ret; } static inline void alarmtimer_rtc_timer_init(void) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index fff5f64981c6..428beb69426a 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -293,8 +293,15 @@ static void clocksource_watchdog(struct timer_list *unused) next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); if (next_cpu >= nr_cpu_ids) next_cpu = cpumask_first(cpu_online_mask); - watchdog_timer.expires += WATCHDOG_INTERVAL; - add_timer_on(&watchdog_timer, next_cpu); + + /* + * Arm timer if not already pending: could race with concurrent + * pair clocksource_stop_watchdog() clocksource_start_watchdog(). + */ + if (!timer_pending(&watchdog_timer)) { + watchdog_timer.expires += WATCHDOG_INTERVAL; + add_timer_on(&watchdog_timer, next_cpu); + } out: spin_unlock(&watchdog_lock); } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0708a41cfe2d..407d8bf4ed93 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5102,8 +5102,8 @@ static const struct file_operations ftrace_notrace_fops = { static DEFINE_MUTEX(graph_lock); -struct ftrace_hash *ftrace_graph_hash = EMPTY_HASH; -struct ftrace_hash *ftrace_graph_notrace_hash = EMPTY_HASH; +struct ftrace_hash __rcu *ftrace_graph_hash = EMPTY_HASH; +struct ftrace_hash __rcu *ftrace_graph_notrace_hash = EMPTY_HASH; enum graph_filter_type { GRAPH_FILTER_NOTRACE = 0, @@ -5378,8 +5378,15 @@ ftrace_graph_release(struct inode *inode, struct file *file) mutex_unlock(&graph_lock); - /* Wait till all users are no longer using the old hash */ - synchronize_rcu(); + /* + * We need to do a hard force of sched synchronization. + * This is because we use preempt_disable() to do RCU, but + * the function tracers can be called where RCU is not watching + * (like before user_exit()). We can not rely on the RCU + * infrastructure to do the synchronization, thus we must do it + * ourselves. + */ + schedule_on_each_cpu(ftrace_sync); free_ftrace_hash(old_hash); } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d685c61085c0..a3c29d5fcc61 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -932,22 +932,31 @@ extern void __trace_graph_return(struct trace_array *tr, unsigned long flags, int pc); #ifdef CONFIG_DYNAMIC_FTRACE -extern struct ftrace_hash *ftrace_graph_hash; -extern struct ftrace_hash *ftrace_graph_notrace_hash; +extern struct ftrace_hash __rcu *ftrace_graph_hash; +extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash; static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) { unsigned long addr = trace->func; int ret = 0; + struct ftrace_hash *hash; preempt_disable_notrace(); - if (ftrace_hash_empty(ftrace_graph_hash)) { + /* + * Have to open code "rcu_dereference_sched()" because the + * function graph tracer can be called when RCU is not + * "watching". + * Protected with schedule_on_each_cpu(ftrace_sync) + */ + hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible()); + + if (ftrace_hash_empty(hash)) { ret = 1; goto out; } - if (ftrace_lookup_ip(ftrace_graph_hash, addr)) { + if (ftrace_lookup_ip(hash, addr)) { /* * This needs to be cleared on the return functions @@ -983,10 +992,20 @@ static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) static inline int ftrace_graph_notrace_addr(unsigned long addr) { int ret = 0; + struct ftrace_hash *notrace_hash; preempt_disable_notrace(); - if (ftrace_lookup_ip(ftrace_graph_notrace_hash, addr)) + /* + * Have to open code "rcu_dereference_sched()" because the + * function graph tracer can be called when RCU is not + * "watching". + * Protected with schedule_on_each_cpu(ftrace_sync) + */ + notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash, + !preemptible()); + + if (ftrace_lookup_ip(notrace_hash, addr)) ret = 1; preempt_enable_notrace(); diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 205692181e7b..4be7fc84d6b6 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -470,11 +470,12 @@ struct action_data { * When a histogram trigger is hit, the values of any * references to variables, including variables being passed * as parameters to synthetic events, are collected into a - * var_ref_vals array. This var_ref_idx is the index of the - * first param in the array to be passed to the synthetic - * event invocation. + * var_ref_vals array. This var_ref_idx array is an array of + * indices into the var_ref_vals array, one for each synthetic + * event param, and is passed to the synthetic event + * invocation. */ - unsigned int var_ref_idx; + unsigned int var_ref_idx[TRACING_MAP_VARS_MAX]; struct synth_event *synth_event; bool use_trace_keyword; char *synth_event_name; @@ -875,14 +876,14 @@ static struct trace_event_functions synth_event_funcs = { static notrace void trace_event_raw_event_synth(void *__data, u64 *var_ref_vals, - unsigned int var_ref_idx) + unsigned int *var_ref_idx) { struct trace_event_file *trace_file = __data; struct synth_trace_event *entry; struct trace_event_buffer fbuffer; struct ring_buffer *buffer; struct synth_event *event; - unsigned int i, n_u64; + unsigned int i, n_u64, val_idx; int fields_size = 0; event = trace_file->event_call->data; @@ -905,15 +906,16 @@ static notrace void trace_event_raw_event_synth(void *__data, goto out; for (i = 0, n_u64 = 0; i < event->n_fields; i++) { + val_idx = var_ref_idx[i]; if (event->fields[i]->is_string) { - char *str_val = (char *)(long)var_ref_vals[var_ref_idx + i]; + char *str_val = (char *)(long)var_ref_vals[val_idx]; char *str_field = (char *)&entry->fields[n_u64]; strscpy(str_field, str_val, STR_VAR_LEN_MAX); n_u64 += STR_VAR_LEN_MAX / sizeof(u64); } else { struct synth_field *field = event->fields[i]; - u64 val = var_ref_vals[var_ref_idx + i]; + u64 val = var_ref_vals[val_idx]; switch (field->size) { case 1: @@ -1113,10 +1115,10 @@ static struct tracepoint *alloc_synth_tracepoint(char *name) } typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals, - unsigned int var_ref_idx); + unsigned int *var_ref_idx); static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals, - unsigned int var_ref_idx) + unsigned int *var_ref_idx) { struct tracepoint *tp = event->tp; @@ -2655,6 +2657,22 @@ static int init_var_ref(struct hist_field *ref_field, goto out; } +static int find_var_ref_idx(struct hist_trigger_data *hist_data, + struct hist_field *var_field) +{ + struct hist_field *ref_field; + int i; + + for (i = 0; i < hist_data->n_var_refs; i++) { + ref_field = hist_data->var_refs[i]; + if (ref_field->var.idx == var_field->var.idx && + ref_field->var.hist_data == var_field->hist_data) + return i; + } + + return -ENOENT; +} + /** * create_var_ref - Create a variable reference and attach it to trigger * @hist_data: The trigger that will be referencing the variable @@ -4228,11 +4246,11 @@ static int trace_action_create(struct hist_trigger_data *hist_data, struct trace_array *tr = hist_data->event_file->tr; char *event_name, *param, *system = NULL; struct hist_field *hist_field, *var_ref; - unsigned int i, var_ref_idx; + unsigned int i; unsigned int field_pos = 0; struct synth_event *event; char *synth_event_name; - int ret = 0; + int var_ref_idx, ret = 0; lockdep_assert_held(&event_mutex); @@ -4249,8 +4267,6 @@ static int trace_action_create(struct hist_trigger_data *hist_data, event->ref++; - var_ref_idx = hist_data->n_var_refs; - for (i = 0; i < data->n_params; i++) { char *p; @@ -4299,6 +4315,14 @@ static int trace_action_create(struct hist_trigger_data *hist_data, goto err; } + var_ref_idx = find_var_ref_idx(hist_data, var_ref); + if (WARN_ON(var_ref_idx < 0)) { + ret = var_ref_idx; + goto err; + } + + data->var_ref_idx[i] = var_ref_idx; + field_pos++; kfree(p); continue; @@ -4317,7 +4341,6 @@ static int trace_action_create(struct hist_trigger_data *hist_data, } data->synth_event = event; - data->var_ref_idx = var_ref_idx; out: return ret; err: diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 9ae87be422f2..ab8b6436d53f 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -876,7 +876,8 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, for (i = 0; i < tp->nr_args; i++) { parg = tp->args + i; if (parg->count) { - if (strcmp(parg->type->name, "string") == 0) + if ((strcmp(parg->type->name, "string") == 0) || + (strcmp(parg->type->name, "ustring") == 0)) fmt = ", __get_str(%s[%d])"; else fmt = ", REC->%s[%d]"; @@ -884,7 +885,8 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, pos += snprintf(buf + pos, LEN_OR_ZERO, fmt, parg->name, j); } else { - if (strcmp(parg->type->name, "string") == 0) + if ((strcmp(parg->type->name, "string") == 0) || + (strcmp(parg->type->name, "ustring") == 0)) fmt = ", __get_str(%s)"; else fmt = ", REC->%s"; diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index e288168661e1..e304196d7c28 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -89,8 +89,10 @@ static void tracing_sched_unregister(void) static void tracing_start_sched_switch(int ops) { - bool sched_register = (!sched_cmdline_ref && !sched_tgid_ref); + bool sched_register; + mutex_lock(&sched_register_mutex); + sched_register = (!sched_cmdline_ref && !sched_tgid_ref); switch (ops) { case RECORD_CMDLINE: diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 49cc4d570a40..bd3d9ef7d39e 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -157,6 +157,7 @@ static noinline void __init kmalloc_oob_krealloc_more(void) if (!ptr1 || !ptr2) { pr_err("Allocation failed\n"); kfree(ptr1); + kfree(ptr2); return; } diff --git a/mm/backing-dev.c b/mm/backing-dev.c index c360f6a6c844..62f05f605fb5 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -21,6 +21,7 @@ struct backing_dev_info noop_backing_dev_info = { EXPORT_SYMBOL_GPL(noop_backing_dev_info); static struct class *bdi_class; +const char *bdi_unknown_name = "(unknown)"; /* * bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ef4e9eb572a4..b5b4e310fe70 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5465,14 +5465,6 @@ static int mem_cgroup_move_account(struct page *page, __mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages); } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (compound && !list_empty(page_deferred_list(page))) { - spin_lock(&from->deferred_split_queue.split_queue_lock); - list_del_init(page_deferred_list(page)); - from->deferred_split_queue.split_queue_len--; - spin_unlock(&from->deferred_split_queue.split_queue_lock); - } -#endif /* * It is safe to change page->mem_cgroup here because the page * is referenced, charged, and isolated - we can't race with @@ -5482,16 +5474,6 @@ static int mem_cgroup_move_account(struct page *page, /* caller should have done css_get */ page->mem_cgroup = to; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (compound && list_empty(page_deferred_list(page))) { - spin_lock(&to->deferred_split_queue.split_queue_lock); - list_add_tail(page_deferred_list(page), - &to->deferred_split_queue.split_queue); - to->deferred_split_queue.split_queue_len++; - spin_unlock(&to->deferred_split_queue.split_queue_lock); - } -#endif - spin_unlock_irqrestore(&from->move_lock, flags); ret = 0; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fab540685279..0aa154be3a52 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1738,8 +1738,6 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) BUG_ON(check_hotplug_memory_range(start, size)); - mem_hotplug_begin(); - /* * All memory blocks must be offlined before removing memory. Check * whether all memory blocks in question are offline and return error @@ -1754,9 +1752,14 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) memblock_free(start, size); memblock_remove(start, size); - /* remove memory block devices before removing memory */ + /* + * Memory block device removal under the device_hotplug_lock is + * a barrier against racing online attempts. + */ remove_memory_block_devices(start, size); + mem_hotplug_begin(); + arch_remove_memory(nid, start, size, NULL); __release_memory_resource(start, size); diff --git a/mm/migrate.c b/mm/migrate.c index 6956627ebf8b..c4c313e47f12 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1631,8 +1631,19 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, start = i; } else if (node != current_node) { err = do_move_pages_to_node(mm, &pagelist, current_node); - if (err) + if (err) { + /* + * Positive err means the number of failed + * pages to migrate. Since we are going to + * abort and return the number of non-migrated + * pages, so need to incude the rest of the + * nr_pages that have not been attempted as + * well. + */ + if (err > 0) + err += nr_pages - i - 1; goto out; + } err = store_status(status, start, current_node, i - start); if (err) goto out; @@ -1663,8 +1674,11 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, goto out_flush; err = do_move_pages_to_node(mm, &pagelist, current_node); - if (err) + if (err) { + if (err > 0) + err += nr_pages - i - 1; goto out; + } if (i > start) { err = store_status(status, start, current_node, i - start); if (err) @@ -1678,6 +1692,13 @@ out_flush: /* Make sure we do not overwrite the existing error */ err1 = do_move_pages_to_node(mm, &pagelist, current_node); + /* + * Don't have to report non-attempted pages here since: + * - If the above loop is done gracefully all pages have been + * attempted. + * - If the above loop is aborted it means a fatal error + * happened, should return ret. + */ if (!err1) err1 = store_status(status, start, current_node, i - start); if (err >= 0) diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 7d70e5c78f97..7c1b8f67af7b 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -102,14 +102,14 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_ */ static inline void tlb_table_invalidate(struct mmu_gather *tlb) { -#ifndef CONFIG_HAVE_RCU_TABLE_NO_INVALIDATE - /* - * Invalidate page-table caches used by hardware walkers. Then we still - * need to RCU-sched wait while freeing the pages because software - * walkers can still be in-flight. - */ - tlb_flush_mmu_tlbonly(tlb); -#endif + if (tlb_needs_table_invalidate()) { + /* + * Invalidate page-table caches used by hardware walkers. Then + * we still need to RCU-sched wait while freeing the pages + * because software walkers can still be in-flight. + */ + tlb_flush_mmu_tlbonly(tlb); + } } static void tlb_remove_table_smp_sync(void *arg) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 45e39131a716..d387ca74cb5a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6933,7 +6933,8 @@ static u64 zero_pfn_range(unsigned long spfn, unsigned long epfn) * This function also addresses a similar issue where struct pages are left * uninitialized because the physical address range is not covered by * memblock.memory or memblock.reserved. That could happen when memblock - * layout is manually configured via memmap=. + * layout is manually configured via memmap=, or when the highest physical + * address (max_pfn) does not end on a section boundary. */ void __init zero_resv_unavail(void) { @@ -6951,7 +6952,16 @@ void __init zero_resv_unavail(void) pgcnt += zero_pfn_range(PFN_DOWN(next), PFN_UP(start)); next = end; } - pgcnt += zero_pfn_range(PFN_DOWN(next), max_pfn); + + /* + * Early sections always have a fully populated memmap for the whole + * section - see pfn_valid(). If the last section has holes at the + * end and that section is marked "online", the memmap will be + * considered initialized. Make sure that memmap has a well defined + * state. + */ + pgcnt += zero_pfn_range(PFN_DOWN(next), + round_up(max_pfn, PAGES_PER_SECTION)); /* * Struct pages that do not have backing memory. This could be because diff --git a/mm/sparse.c b/mm/sparse.c index 1100fdb9649c..69b41b6046a5 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -787,7 +787,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, ms->usage = NULL; } memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); - ms->section_mem_map = sparse_encode_mem_map(NULL, section_nr); + ms->section_mem_map = (unsigned long)NULL; } if (section_is_early && memmap) diff --git a/net/core/devlink.c b/net/core/devlink.c index ae614965c8c2..61bc67047f56 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3863,6 +3863,12 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto out_unlock; } + /* return 0 if there is no further data to read */ + if (start_offset >= region->size) { + err = 0; + goto out_unlock; + } + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, DEVLINK_CMD_REGION_READ); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 536e032d95c8..246a258b1fac 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -1004,8 +1004,10 @@ static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack) { int cpu; - if (!monitor_hw) + if (!monitor_hw) { NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled"); + return; + } monitor_hw = false; diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index ee561297d8a7..fbfd0db182b7 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -27,6 +27,8 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) rcu_read_lock(); /* hsr->node_db, hsr->ports */ port = hsr_port_get_rcu(skb->dev); + if (!port) + goto finish_pass; if (hsr_addr_is_self(port->hsr, eth_hdr(skb)->h_source)) { /* Directly kill frames sent by ourselves */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3640e8563a10..deb466fc3d1f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2618,10 +2618,12 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_cwnd = TCP_INIT_CWND; tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; + tp->delivered = 0; tp->delivered_ce = 0; tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; tcp_clear_retrans(tp); + tp->total_retrans = 0; inet_csk_delack_init(sk); /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 * issue in __tcp_select_window() @@ -2633,10 +2635,14 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_rx_dst = NULL; tcp_saved_syn_free(tp); tp->compressed_ack = 0; + tp->segs_in = 0; + tp->segs_out = 0; tp->bytes_sent = 0; tp->bytes_acked = 0; tp->bytes_received = 0; tp->bytes_retrans = 0; + tp->data_segs_in = 0; + tp->data_segs_out = 0; tp->duplicate_sack[0].start_seq = 0; tp->duplicate_sack[0].end_seq = 0; tp->dsack_dups = 0; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f9b5690e94fd..b11ccb53c7e0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5719,6 +5719,9 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) struct nlattr *tb[IFLA_INET6_MAX + 1]; int err; + if (!idev) + return -EAFNOSUPPORT; + if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0) BUG(); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index f82ea12bac37..425b95eb7e87 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -322,8 +322,13 @@ int l2tp_session_register(struct l2tp_session *session, spin_lock_bh(&pn->l2tp_session_hlist_lock); + /* IP encap expects session IDs to be globally unique, while + * UDP encap doesn't. + */ hlist_for_each_entry(session_walk, g_head, global_hlist) - if (session_walk->session_id == session->session_id) { + if (session_walk->session_id == session->session_id && + (session_walk->tunnel->encap == L2TP_ENCAPTYPE_IP || + tunnel->encap == L2TP_ENCAPTYPE_IP)) { err = -EEXIST; goto err_tlock_pnlock; } diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index d8143a8c034d..a9df9dac57b2 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1293,31 +1293,34 @@ ip_set_dump_policy[IPSET_ATTR_CMD_MAX + 1] = { }; static int -dump_init(struct netlink_callback *cb, struct ip_set_net *inst) +ip_set_dump_start(struct netlink_callback *cb) { struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1]; struct nlattr *attr = (void *)nlh + min_len; + struct sk_buff *skb = cb->skb; + struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk)); u32 dump_type; - ip_set_id_t index; int ret; ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, ip_set_dump_policy, NULL); if (ret) - return ret; + goto error; cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]); if (cda[IPSET_ATTR_SETNAME]) { + ip_set_id_t index; struct ip_set *set; set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]), &index); - if (!set) - return -ENOENT; - + if (!set) { + ret = -ENOENT; + goto error; + } dump_type = DUMP_ONE; cb->args[IPSET_CB_INDEX] = index; } else { @@ -1333,10 +1336,17 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) cb->args[IPSET_CB_DUMP] = dump_type; return 0; + +error: + /* We have to create and send the error message manually :-( */ + if (nlh->nlmsg_flags & NLM_F_ACK) { + netlink_ack(cb->skb, nlh, ret, NULL); + } + return ret; } static int -ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) +ip_set_dump_do(struct sk_buff *skb, struct netlink_callback *cb) { ip_set_id_t index = IPSET_INVALID_ID, max; struct ip_set *set = NULL; @@ -1347,18 +1357,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) bool is_destroyed; int ret = 0; - if (!cb->args[IPSET_CB_DUMP]) { - ret = dump_init(cb, inst); - if (ret < 0) { - nlh = nlmsg_hdr(cb->skb); - /* We have to create and send the error message - * manually :-( - */ - if (nlh->nlmsg_flags & NLM_F_ACK) - netlink_ack(cb->skb, nlh, ret, NULL); - return ret; - } - } + if (!cb->args[IPSET_CB_DUMP]) + return -EINVAL; if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max) goto out; @@ -1494,7 +1494,8 @@ static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb, { struct netlink_dump_control c = { - .dump = ip_set_dump_start, + .start = ip_set_dump_start, + .dump = ip_set_dump_do, .done = ip_set_dump_done, }; return netlink_dump_start(ctnl, skb, nlh, &c); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index d72ddb67bb74..4a6ca9723a12 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -194,6 +194,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) service_in_use: write_unlock(&local->services_lock); rxrpc_unuse_local(local); + rxrpc_put_local(local); ret = -EADDRINUSE; error_unlock: release_sock(&rx->sk); @@ -899,6 +900,7 @@ static int rxrpc_release_sock(struct sock *sk) rxrpc_purge_queue(&sk->sk_receive_queue); rxrpc_unuse_local(rx->local); + rxrpc_put_local(rx->local); rx->local = NULL; key_put(rx->key); rx->key = NULL; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 5e99df80e80a..7d730c438404 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -490,6 +490,7 @@ enum rxrpc_call_flag { RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */ RXRPC_CALL_RX_UNDERRUN, /* Got data underrun */ RXRPC_CALL_IS_INTR, /* The call is interruptible */ + RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */ }; /* @@ -1021,6 +1022,16 @@ void rxrpc_unuse_local(struct rxrpc_local *); void rxrpc_queue_local(struct rxrpc_local *); void rxrpc_destroy_all_locals(struct rxrpc_net *); +static inline bool __rxrpc_unuse_local(struct rxrpc_local *local) +{ + return atomic_dec_return(&local->active_users) == 0; +} + +static inline bool __rxrpc_use_local(struct rxrpc_local *local) +{ + return atomic_fetch_add_unless(&local->active_users, 1, 0) != 0; +} + /* * misc.c */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index a31c18c09894..dbdbc4f18b5e 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -493,7 +493,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); - if (conn) + if (conn && !test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) rxrpc_disconnect_call(call); if (call->security) call->security->free_call_crypto(call); @@ -569,6 +569,7 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); struct rxrpc_net *rxnet = call->rxnet; + rxrpc_put_connection(call->conn); rxrpc_put_peer(call->peer); kfree(call->rxtx_buffer); kfree(call->rxtx_annotations); @@ -590,7 +591,6 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); - ASSERTCMP(call->conn, ==, NULL); rxrpc_cleanup_ring(call); rxrpc_free_skb(call->tx_pending, rxrpc_skb_cleaned); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 376370cd9285..ea7d4c21f889 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -785,6 +785,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) u32 cid; spin_lock(&conn->channel_lock); + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); cid = call->cid; if (cid) { @@ -792,7 +793,6 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) chan = &conn->channels[channel]; } trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); - call->conn = NULL; /* Calls that have never actually been assigned a channel can simply be * discarded. If the conn didn't get used either, it will follow @@ -908,7 +908,6 @@ out: spin_unlock(&rxnet->client_conn_cache_lock); out_2: spin_unlock(&conn->channel_lock); - rxrpc_put_connection(conn); _leave(""); return; diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 808a4723f868..06fcff2ebbba 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -438,16 +438,12 @@ again: /* * connection-level event processor */ -void rxrpc_process_connection(struct work_struct *work) +static void rxrpc_do_process_connection(struct rxrpc_connection *conn) { - struct rxrpc_connection *conn = - container_of(work, struct rxrpc_connection, processor); struct sk_buff *skb; u32 abort_code = RX_PROTOCOL_ERROR; int ret; - rxrpc_see_connection(conn); - if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) rxrpc_secure_connection(conn); @@ -475,18 +471,32 @@ void rxrpc_process_connection(struct work_struct *work) } } -out: - rxrpc_put_connection(conn); - _leave(""); return; requeue_and_leave: skb_queue_head(&conn->rx_queue, skb); - goto out; + return; protocol_error: if (rxrpc_abort_connection(conn, ret, abort_code) < 0) goto requeue_and_leave; rxrpc_free_skb(skb, rxrpc_skb_freed); - goto out; + return; +} + +void rxrpc_process_connection(struct work_struct *work) +{ + struct rxrpc_connection *conn = + container_of(work, struct rxrpc_connection, processor); + + rxrpc_see_connection(conn); + + if (__rxrpc_use_local(conn->params.local)) { + rxrpc_do_process_connection(conn); + rxrpc_unuse_local(conn->params.local); + } + + rxrpc_put_connection(conn); + _leave(""); + return; } diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 38d718e90dc6..19e141eeed17 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -223,9 +223,8 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) __rxrpc_disconnect_call(conn, call); spin_unlock(&conn->channel_lock); - call->conn = NULL; + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); conn->idle_timestamp = jiffies; - rxrpc_put_connection(conn); } /* diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 96d54e5bf7bc..ef10fbf71b15 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -599,10 +599,8 @@ ack: false, true, rxrpc_propose_ack_input_data); - if (seq0 == READ_ONCE(call->rx_hard_ack) + 1) { - trace_rxrpc_notify_socket(call->debug_id, serial); - rxrpc_notify_socket(call); - } + trace_rxrpc_notify_socket(call->debug_id, serial); + rxrpc_notify_socket(call); unlock: spin_unlock(&call->input_lock); diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 36587260cabd..a6c1349e965d 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -364,11 +364,14 @@ void rxrpc_queue_local(struct rxrpc_local *local) void rxrpc_put_local(struct rxrpc_local *local) { const void *here = __builtin_return_address(0); + unsigned int debug_id; int n; if (local) { + debug_id = local->debug_id; + n = atomic_dec_return(&local->usage); - trace_rxrpc_local(local->debug_id, rxrpc_local_put, n, here); + trace_rxrpc_local(debug_id, rxrpc_local_put, n, here); if (n == 0) call_rcu(&local->rcu, rxrpc_local_rcu); @@ -380,14 +383,11 @@ void rxrpc_put_local(struct rxrpc_local *local) */ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local) { - unsigned int au; - local = rxrpc_get_local_maybe(local); if (!local) return NULL; - au = atomic_fetch_add_unless(&local->active_users, 1, 0); - if (au == 0) { + if (!__rxrpc_use_local(local)) { rxrpc_put_local(local); return NULL; } @@ -401,14 +401,11 @@ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local) */ void rxrpc_unuse_local(struct rxrpc_local *local) { - unsigned int au; - if (local) { - au = atomic_dec_return(&local->active_users); - if (au == 0) + if (__rxrpc_unuse_local(local)) { + rxrpc_get_local(local); rxrpc_queue_local(local); - else - rxrpc_put_local(local); + } } } @@ -465,7 +462,7 @@ static void rxrpc_local_processor(struct work_struct *work) do { again = false; - if (atomic_read(&local->active_users) == 0) { + if (!__rxrpc_use_local(local)) { rxrpc_local_destroyer(local); break; } @@ -479,6 +476,8 @@ static void rxrpc_local_processor(struct work_struct *work) rxrpc_process_local_events(local); again = true; } + + __rxrpc_unuse_local(local); } while (again); rxrpc_put_local(local); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 935bb60fff56..bad3d2420344 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -129,7 +129,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, rxrpc_serial_t *_serial) { - struct rxrpc_connection *conn = NULL; + struct rxrpc_connection *conn; struct rxrpc_ack_buffer *pkt; struct msghdr msg; struct kvec iov[2]; @@ -139,18 +139,14 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, int ret; u8 reason; - spin_lock_bh(&call->lock); - if (call->conn) - conn = rxrpc_get_connection_maybe(call->conn); - spin_unlock_bh(&call->lock); - if (!conn) + if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) return -ECONNRESET; pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) { - rxrpc_put_connection(conn); + if (!pkt) return -ENOMEM; - } + + conn = call->conn; msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; @@ -244,7 +240,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, } out: - rxrpc_put_connection(conn); kfree(pkt); return ret; } @@ -254,7 +249,7 @@ out: */ int rxrpc_send_abort_packet(struct rxrpc_call *call) { - struct rxrpc_connection *conn = NULL; + struct rxrpc_connection *conn; struct rxrpc_abort_buffer pkt; struct msghdr msg; struct kvec iov[1]; @@ -271,13 +266,11 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) test_bit(RXRPC_CALL_TX_LAST, &call->flags)) return 0; - spin_lock_bh(&call->lock); - if (call->conn) - conn = rxrpc_get_connection_maybe(call->conn); - spin_unlock_bh(&call->lock); - if (!conn) + if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) return -ECONNRESET; + conn = call->conn; + msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; msg.msg_control = NULL; @@ -312,8 +305,6 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr, rxrpc_tx_point_call_abort); rxrpc_tx_backoff(call, ret); - - rxrpc_put_connection(conn); return ret; } diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 48f67a9b1037..923b263c401b 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -364,27 +364,31 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, if (!rxrpc_get_peer_maybe(peer)) continue; - spin_unlock_bh(&rxnet->peer_hash_lock); - - keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; - slot = keepalive_at - base; - _debug("%02x peer %u t=%d {%pISp}", - cursor, peer->debug_id, slot, &peer->srx.transport); + if (__rxrpc_use_local(peer->local)) { + spin_unlock_bh(&rxnet->peer_hash_lock); + + keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; + slot = keepalive_at - base; + _debug("%02x peer %u t=%d {%pISp}", + cursor, peer->debug_id, slot, &peer->srx.transport); + + if (keepalive_at <= base || + keepalive_at > base + RXRPC_KEEPALIVE_TIME) { + rxrpc_send_keepalive(peer); + slot = RXRPC_KEEPALIVE_TIME; + } - if (keepalive_at <= base || - keepalive_at > base + RXRPC_KEEPALIVE_TIME) { - rxrpc_send_keepalive(peer); - slot = RXRPC_KEEPALIVE_TIME; + /* A transmission to this peer occurred since last we + * examined it so put it into the appropriate future + * bucket. + */ + slot += cursor; + slot &= mask; + spin_lock_bh(&rxnet->peer_hash_lock); + list_add_tail(&peer->keepalive_link, + &rxnet->peer_keepalive[slot & mask]); + rxrpc_unuse_local(peer->local); } - - /* A transmission to this peer occurred since last we examined - * it so put it into the appropriate future bucket. - */ - slot += cursor; - slot &= mask; - spin_lock_bh(&rxnet->peer_hash_lock); - list_add_tail(&peer->keepalive_link, - &rxnet->peer_keepalive[slot & mask]); rxrpc_put_peer_locked(peer); } diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index c22624131949..d36949d9382c 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -463,10 +463,8 @@ static u32 gen_tunnel(struct rsvp_head *data) static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = { [TCA_RSVP_CLASSID] = { .type = NLA_U32 }, - [TCA_RSVP_DST] = { .type = NLA_BINARY, - .len = RSVP_DST_LEN * sizeof(u32) }, - [TCA_RSVP_SRC] = { .type = NLA_BINARY, - .len = RSVP_DST_LEN * sizeof(u32) }, + [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) }, + [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) }, [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) }, }; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 3d4a1280352f..09b7dc5fe7e0 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -333,12 +333,31 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, cp->fall_through = p->fall_through; cp->tp = tp; + if (tb[TCA_TCINDEX_HASH]) + cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); + + if (tb[TCA_TCINDEX_MASK]) + cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); + + if (tb[TCA_TCINDEX_SHIFT]) + cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); + + if (!cp->hash) { + /* Hash not specified, use perfect hash if the upper limit + * of the hashing index is below the threshold. + */ + if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD) + cp->hash = (cp->mask >> cp->shift) + 1; + else + cp->hash = DEFAULT_HASH_SIZE; + } + if (p->perfect) { int i; if (tcindex_alloc_perfect_hash(net, cp) < 0) goto errout; - for (i = 0; i < cp->hash; i++) + for (i = 0; i < min(cp->hash, p->hash); i++) cp->perfect[i].res = p->perfect[i].res; balloc = 1; } @@ -346,19 +365,10 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = tcindex_filter_result_init(&new_filter_result, net); if (err < 0) - goto errout1; + goto errout_alloc; if (old_r) cr = r->res; - if (tb[TCA_TCINDEX_HASH]) - cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); - - if (tb[TCA_TCINDEX_MASK]) - cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); - - if (tb[TCA_TCINDEX_SHIFT]) - cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); - err = -EBUSY; /* Hash already allocated, make sure that we still meet the @@ -376,16 +386,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (tb[TCA_TCINDEX_FALL_THROUGH]) cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]); - if (!cp->hash) { - /* Hash not specified, use perfect hash if the upper limit - * of the hashing index is below the threshold. - */ - if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD) - cp->hash = (cp->mask >> cp->shift) + 1; - else - cp->hash = DEFAULT_HASH_SIZE; - } - if (!cp->perfect && !cp->h) cp->alloc_hash = cp->hash; @@ -484,7 +484,6 @@ errout_alloc: tcindex_free_perfect_hash(cp); else if (balloc == 2) kfree(cp->h); -errout1: tcf_exts_destroy(&new_filter_result.exts); errout: kfree(cp); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index c609373c8661..660fc45ee40f 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -31,6 +31,7 @@ static DEFINE_SPINLOCK(taprio_list_lock); #define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) #define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD) +#define TAPRIO_FLAGS_INVALID U32_MAX struct sched_entry { struct list_head list; @@ -766,6 +767,7 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 }, [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 }, + [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 }, }; static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, @@ -1367,6 +1369,33 @@ static int taprio_mqprio_cmp(const struct net_device *dev, return 0; } +/* The semantics of the 'flags' argument in relation to 'change()' + * requests, are interpreted following two rules (which are applied in + * this order): (1) an omitted 'flags' argument is interpreted as + * zero; (2) the 'flags' of a "running" taprio instance cannot be + * changed. + */ +static int taprio_new_flags(const struct nlattr *attr, u32 old, + struct netlink_ext_ack *extack) +{ + u32 new = 0; + + if (attr) + new = nla_get_u32(attr); + + if (old != TAPRIO_FLAGS_INVALID && old != new) { + NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported"); + return -EOPNOTSUPP; + } + + if (!taprio_flags_valid(new)) { + NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid"); + return -EINVAL; + } + + return new; +} + static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -1375,7 +1404,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct tc_mqprio_qopt *mqprio = NULL; - u32 taprio_flags = 0; unsigned long flags; ktime_t start; int i, err; @@ -1388,21 +1416,14 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_TAPRIO_ATTR_PRIOMAP]) mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]); - if (tb[TCA_TAPRIO_ATTR_FLAGS]) { - taprio_flags = nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]); - - if (q->flags != 0 && q->flags != taprio_flags) { - NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported"); - return -EOPNOTSUPP; - } else if (!taprio_flags_valid(taprio_flags)) { - NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid"); - return -EINVAL; - } + err = taprio_new_flags(tb[TCA_TAPRIO_ATTR_FLAGS], + q->flags, extack); + if (err < 0) + return err; - q->flags = taprio_flags; - } + q->flags = err; - err = taprio_parse_mqprio_opt(dev, mqprio, extack, taprio_flags); + err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags); if (err < 0) return err; @@ -1444,7 +1465,20 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, taprio_set_picos_per_byte(dev, q); - if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) + if (mqprio) { + netdev_set_num_tc(dev, mqprio->num_tc); + for (i = 0; i < mqprio->num_tc; i++) + netdev_set_tc_queue(dev, i, + mqprio->count[i], + mqprio->offset[i]); + + /* Always use supplied priority mappings */ + for (i = 0; i <= TC_BITMASK; i++) + netdev_set_prio_tc_map(dev, i, + mqprio->prio_tc_map[i]); + } + + if (FULL_OFFLOAD_IS_ENABLED(q->flags)) err = taprio_enable_offload(dev, mqprio, q, new_admin, extack); else err = taprio_disable_offload(dev, q, extack); @@ -1464,27 +1498,14 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]); } - if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) && - !FULL_OFFLOAD_IS_ENABLED(taprio_flags) && + if (!TXTIME_ASSIST_IS_ENABLED(q->flags) && + !FULL_OFFLOAD_IS_ENABLED(q->flags) && !hrtimer_active(&q->advance_timer)) { hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); q->advance_timer.function = advance_sched; } - if (mqprio) { - netdev_set_num_tc(dev, mqprio->num_tc); - for (i = 0; i < mqprio->num_tc; i++) - netdev_set_tc_queue(dev, i, - mqprio->count[i], - mqprio->offset[i]); - - /* Always use supplied priority mappings */ - for (i = 0; i <= TC_BITMASK; i++) - netdev_set_prio_tc_map(dev, i, - mqprio->prio_tc_map[i]); - } - - if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) { + if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { q->dequeue = taprio_dequeue_offload; q->peek = taprio_peek_offload; } else { @@ -1501,9 +1522,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, goto unlock; } - if (TXTIME_ASSIST_IS_ENABLED(taprio_flags)) { - setup_txtime(q, new_admin, start); + setup_txtime(q, new_admin, start); + if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { if (!oper) { rcu_assign_pointer(q->oper_sched, new_admin); err = 0; @@ -1528,7 +1549,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, spin_unlock_irqrestore(&q->current_entry_lock, flags); - if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) + if (FULL_OFFLOAD_IS_ENABLED(q->flags)) taprio_offload_config_changed(q); } @@ -1567,7 +1588,7 @@ static void taprio_destroy(struct Qdisc *sch) } q->qdiscs = NULL; - netdev_set_num_tc(dev, 0); + netdev_reset_tc(dev); if (q->oper_sched) call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb); @@ -1597,6 +1618,7 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, * and get the valid one on taprio_change(). */ q->clockid = -1; + q->flags = TAPRIO_FLAGS_INVALID; spin_lock(&taprio_list_lock); list_add(&q->taprio_list, &taprio_list); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 908b60a72d95..ed20fa8a6f70 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1245,6 +1245,7 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, dprintk("RPC: No creds found!\n"); goto out; } else { + struct timespec64 boot; /* steal creds */ rsci.cred = ud->creds; @@ -1265,6 +1266,9 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, &expiry, GFP_KERNEL); if (status) goto out; + + getboottime64(&boot); + expiry -= boot.tv_sec; } rsci.h.expiry_time = expiry; diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 42b571cde177..e7ad48c605e0 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -236,7 +236,7 @@ all: clean: $(MAKE) -C ../../ M=$(CURDIR) clean - @rm -f *~ + @find $(CURDIR) -type f -name '*~' -delete $(LIBBPF): FORCE # Fix up variables inherited from Kbuild that tools/ build system won't like diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 0da6e9e7132e..8b862a7a6c6a 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -16,6 +16,10 @@ static const char *__doc__ = #include #include #include +#include + +#define __must_check +#include #include #include @@ -46,6 +50,10 @@ static int cpus_count_map_fd; static int cpus_iterator_map_fd; static int exception_cnt_map_fd; +#define NUM_TP 5 +struct bpf_link *tp_links[NUM_TP] = { 0 }; +static int tp_cnt = 0; + /* Exit return codes */ #define EXIT_OK 0 #define EXIT_FAIL 1 @@ -88,6 +96,10 @@ static void int_exit(int sig) printf("program on interface changed, not removing\n"); } } + /* Detach tracepoints */ + while (tp_cnt) + bpf_link__destroy(tp_links[--tp_cnt]); + exit(EXIT_OK); } @@ -588,23 +600,61 @@ static void stats_poll(int interval, bool use_separators, char *prog_name, free_stats_record(prev); } +static struct bpf_link * attach_tp(struct bpf_object *obj, + const char *tp_category, + const char* tp_name) +{ + struct bpf_program *prog; + struct bpf_link *link; + char sec_name[PATH_MAX]; + int len; + + len = snprintf(sec_name, PATH_MAX, "tracepoint/%s/%s", + tp_category, tp_name); + if (len < 0) + exit(EXIT_FAIL); + + prog = bpf_object__find_program_by_title(obj, sec_name); + if (!prog) { + fprintf(stderr, "ERR: finding progsec: %s\n", sec_name); + exit(EXIT_FAIL_BPF); + } + + link = bpf_program__attach_tracepoint(prog, tp_category, tp_name); + if (IS_ERR(link)) + exit(EXIT_FAIL_BPF); + + return link; +} + +static void init_tracepoints(struct bpf_object *obj) { + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_err"); + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_map_err"); + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_exception"); + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_enqueue"); + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_kthread"); +} + static int init_map_fds(struct bpf_object *obj) { - cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map"); - rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt"); + /* Maps updated by tracepoints */ redirect_err_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt"); + exception_cnt_map_fd = + bpf_object__find_map_fd_by_name(obj, "exception_cnt"); cpumap_enqueue_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt"); cpumap_kthread_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt"); + + /* Maps used by XDP */ + rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt"); + cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map"); cpus_available_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_available"); cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count"); cpus_iterator_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_iterator"); - exception_cnt_map_fd = - bpf_object__find_map_fd_by_name(obj, "exception_cnt"); if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 || redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 || @@ -662,6 +712,7 @@ int main(int argc, char **argv) strerror(errno)); return EXIT_FAIL; } + init_tracepoints(obj); if (init_map_fds(obj) < 0) { fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n"); return EXIT_FAIL; diff --git a/scripts/find-unused-docs.sh b/scripts/find-unused-docs.sh index 3f46f8977dc4..ee6a50e33aba 100755 --- a/scripts/find-unused-docs.sh +++ b/scripts/find-unused-docs.sh @@ -54,7 +54,7 @@ for file in `find $1 -name '*.c'`; do if [[ ${FILES_INCLUDED[$file]+_} ]]; then continue; fi - str=$(scripts/kernel-doc -text -export "$file" 2>/dev/null) + str=$(scripts/kernel-doc -export "$file" 2>/dev/null) if [[ -n "$str" ]]; then echo "$file" fi diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index abeb09c30633..ad22066eba04 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2832,42 +2832,39 @@ static int smack_socket_connect(struct socket *sock, struct sockaddr *sap, int addrlen) { int rc = 0; -#if IS_ENABLED(CONFIG_IPV6) - struct sockaddr_in6 *sip = (struct sockaddr_in6 *)sap; -#endif -#ifdef SMACK_IPV6_SECMARK_LABELING - struct smack_known *rsp; - struct socket_smack *ssp; -#endif if (sock->sk == NULL) return 0; - + if (sock->sk->sk_family != PF_INET && + (!IS_ENABLED(CONFIG_IPV6) || sock->sk->sk_family != PF_INET6)) + return 0; + if (addrlen < offsetofend(struct sockaddr, sa_family)) + return 0; + if (IS_ENABLED(CONFIG_IPV6) && sap->sa_family == AF_INET6) { + struct sockaddr_in6 *sip = (struct sockaddr_in6 *)sap; #ifdef SMACK_IPV6_SECMARK_LABELING - ssp = sock->sk->sk_security; + struct smack_known *rsp; #endif - switch (sock->sk->sk_family) { - case PF_INET: - if (addrlen < sizeof(struct sockaddr_in) || - sap->sa_family != AF_INET) - return -EINVAL; - rc = smack_netlabel_send(sock->sk, (struct sockaddr_in *)sap); - break; - case PF_INET6: - if (addrlen < SIN6_LEN_RFC2133 || sap->sa_family != AF_INET6) - return -EINVAL; + if (addrlen < SIN6_LEN_RFC2133) + return 0; #ifdef SMACK_IPV6_SECMARK_LABELING rsp = smack_ipv6host_label(sip); - if (rsp != NULL) + if (rsp != NULL) { + struct socket_smack *ssp = sock->sk->sk_security; + rc = smk_ipv6_check(ssp->smk_out, rsp, sip, - SMK_CONNECTING); + SMK_CONNECTING); + } #endif #ifdef SMACK_IPV6_PORT_LABELING rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING); #endif - break; + return rc; } + if (sap->sa_family != AF_INET || addrlen < sizeof(struct sockaddr_in)) + return 0; + rc = smack_netlabel_send(sock->sk, (struct sockaddr_in *)sap); return rc; } diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index aee7c04d49e5..b61ba0321a72 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -915,7 +915,7 @@ static void print_formats(struct snd_dummy *dummy, { int i; - for (i = 0; i < SNDRV_PCM_FORMAT_LAST; i++) { + for (i = 0; i <= SNDRV_PCM_FORMAT_LAST; i++) { if (dummy->pcm_hw.formats & (1ULL << i)) snd_iprintf(buffer, " %s", snd_pcm_format_name(i)); } diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index f6cbb831b86a..85beb172d810 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2156,6 +2156,8 @@ static struct snd_pci_quirk power_save_blacklist[] = { /* https://bugzilla.redhat.com/show_bug.cgi?id=1581607 */ SND_PCI_QUIRK(0x1558, 0x3501, "Clevo W35xSS_370SS", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ + SND_PCI_QUIRK(0x1558, 0x6504, "Clevo W65_67SB", 0), + /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ SND_PCI_QUIRK(0x1028, 0x0497, "Dell Precision T3600", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ /* Note the P55A-UD3 and Z87-D3HP share the subsys id for the HDA dev */ @@ -2415,6 +2417,8 @@ static const struct pci_device_id azx_ids[] = { /* Jasperlake */ { PCI_DEVICE(0x8086, 0x38c8), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + { PCI_DEVICE(0x8086, 0x4dc8), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, /* Tigerlake */ { PCI_DEVICE(0x8086, 0xa0c8), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c index 8350954b7986..e5191584638a 100644 --- a/sound/pci/hda/hda_tegra.c +++ b/sound/pci/hda/hda_tegra.c @@ -398,6 +398,7 @@ static int hda_tegra_create(struct snd_card *card, return err; chip->bus.needs_damn_long_delay = 1; + chip->bus.core.aligned_mmio = 1; err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops); if (err < 0) { diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 488c17c9f375..8ac805a634f4 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -4153,6 +4153,7 @@ HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI", patch_i915_glk_hdmi), HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi), HDA_CODEC_ENTRY(0x8086280f, "Icelake HDMI", patch_i915_icl_hdmi), HDA_CODEC_ENTRY(0x80862812, "Tigerlake HDMI", patch_i915_tgl_hdmi), +HDA_CODEC_ENTRY(0x8086281a, "Jasperlake HDMI", patch_i915_icl_hdmi), HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi), HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI", patch_i915_byt_hdmi), HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI", patch_i915_byt_hdmi), diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index aa1f9637d895..e949b372cead 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1344,7 +1344,8 @@ static int sgtl5000_set_power_regs(struct snd_soc_component *component) * if vddio == vdda the source of charge pump should be * assigned manually to VDDIO */ - if (vddio == vdda) { + if (regulator_is_equal(sgtl5000->supplies[VDDA].consumer, + sgtl5000->supplies[VDDIO].consumer)) { lreg_ctrl |= SGTL5000_VDDC_ASSN_OVRD; lreg_ctrl |= SGTL5000_VDDC_MAN_ASSN_VDDIO << SGTL5000_VDDC_MAN_ASSN_SHIFT; diff --git a/sound/soc/intel/boards/skl_hda_dsp_common.c b/sound/soc/intel/boards/skl_hda_dsp_common.c index 58409b6e476e..e3d405e57c5f 100644 --- a/sound/soc/intel/boards/skl_hda_dsp_common.c +++ b/sound/soc/intel/boards/skl_hda_dsp_common.c @@ -38,16 +38,19 @@ int skl_hda_hdmi_add_pcm(struct snd_soc_card *card, int device) return 0; } -SND_SOC_DAILINK_DEFS(idisp1, - DAILINK_COMP_ARRAY(COMP_CPU("iDisp1 Pin")), +SND_SOC_DAILINK_DEF(idisp1_cpu, + DAILINK_COMP_ARRAY(COMP_CPU("iDisp1 Pin"))); +SND_SOC_DAILINK_DEF(idisp1_codec, DAILINK_COMP_ARRAY(COMP_CODEC("ehdaudio0D2", "intel-hdmi-hifi1"))); -SND_SOC_DAILINK_DEFS(idisp2, - DAILINK_COMP_ARRAY(COMP_CPU("iDisp2 Pin")), +SND_SOC_DAILINK_DEF(idisp2_cpu, + DAILINK_COMP_ARRAY(COMP_CPU("iDisp2 Pin"))); +SND_SOC_DAILINK_DEF(idisp2_codec, DAILINK_COMP_ARRAY(COMP_CODEC("ehdaudio0D2", "intel-hdmi-hifi2"))); -SND_SOC_DAILINK_DEFS(idisp3, - DAILINK_COMP_ARRAY(COMP_CPU("iDisp3 Pin")), +SND_SOC_DAILINK_DEF(idisp3_cpu, + DAILINK_COMP_ARRAY(COMP_CPU("iDisp3 Pin"))); +SND_SOC_DAILINK_DEF(idisp3_codec, DAILINK_COMP_ARRAY(COMP_CODEC("ehdaudio0D2", "intel-hdmi-hifi3"))); SND_SOC_DAILINK_DEF(analog_cpu, @@ -80,21 +83,21 @@ struct snd_soc_dai_link skl_hda_be_dai_links[HDA_DSP_MAX_BE_DAI_LINKS] = { .id = 1, .dpcm_playback = 1, .no_pcm = 1, - SND_SOC_DAILINK_REG(idisp1), + SND_SOC_DAILINK_REG(idisp1_cpu, idisp1_codec, platform), }, { .name = "iDisp2", .id = 2, .dpcm_playback = 1, .no_pcm = 1, - SND_SOC_DAILINK_REG(idisp2), + SND_SOC_DAILINK_REG(idisp2_cpu, idisp2_codec, platform), }, { .name = "iDisp3", .id = 3, .dpcm_playback = 1, .no_pcm = 1, - SND_SOC_DAILINK_REG(idisp3), + SND_SOC_DAILINK_REG(idisp3_cpu, idisp3_codec, platform), }, { .name = "Analog Playback and Capture", diff --git a/sound/soc/meson/axg-fifo.c b/sound/soc/meson/axg-fifo.c index 5a3749938900..d286dff3171d 100644 --- a/sound/soc/meson/axg-fifo.c +++ b/sound/soc/meson/axg-fifo.c @@ -108,10 +108,12 @@ static int axg_fifo_pcm_hw_params(struct snd_pcm_substream *ss, { struct snd_pcm_runtime *runtime = ss->runtime; struct axg_fifo *fifo = axg_fifo_data(ss); + unsigned int burst_num, period, threshold; dma_addr_t end_ptr; - unsigned int burst_num; int ret; + period = params_period_bytes(params); + ret = snd_pcm_lib_malloc_pages(ss, params_buffer_bytes(params)); if (ret < 0) return ret; @@ -122,9 +124,25 @@ static int axg_fifo_pcm_hw_params(struct snd_pcm_substream *ss, regmap_write(fifo->map, FIFO_FINISH_ADDR, end_ptr); /* Setup interrupt periodicity */ - burst_num = params_period_bytes(params) / AXG_FIFO_BURST; + burst_num = period / AXG_FIFO_BURST; regmap_write(fifo->map, FIFO_INT_ADDR, burst_num); + /* + * Start the fifo request on the smallest of the following: + * - Half the fifo size + * - Half the period size + */ + threshold = min(period / 2, + (unsigned int)AXG_FIFO_MIN_DEPTH / 2); + + /* + * With the threshold in bytes, register value is: + * V = (threshold / burst) - 1 + */ + threshold /= AXG_FIFO_BURST; + regmap_field_write(fifo->field_threshold, + threshold ? threshold - 1 : 0); + /* Enable block count irq */ regmap_update_bits(fifo->map, FIFO_CTRL0, CTRL0_INT_EN(FIFO_INT_COUNT_REPEAT), @@ -360,6 +378,11 @@ int axg_fifo_probe(struct platform_device *pdev) return fifo->irq; } + fifo->field_threshold = + devm_regmap_field_alloc(dev, fifo->map, data->field_threshold); + if (IS_ERR(fifo->field_threshold)) + return PTR_ERR(fifo->field_threshold); + return devm_snd_soc_register_component(dev, data->component_drv, data->dai_drv, 1); } diff --git a/sound/soc/meson/axg-fifo.h b/sound/soc/meson/axg-fifo.h index bb1e2ce50256..ab546a3cf940 100644 --- a/sound/soc/meson/axg-fifo.h +++ b/sound/soc/meson/axg-fifo.h @@ -9,7 +9,9 @@ struct clk; struct platform_device; +struct reg_field; struct regmap; +struct regmap_field; struct reset_control; struct snd_soc_component_driver; @@ -50,8 +52,6 @@ struct snd_soc_pcm_runtime; #define CTRL1_STATUS2_SEL_MASK GENMASK(11, 8) #define CTRL1_STATUS2_SEL(x) ((x) << 8) #define STATUS2_SEL_DDR_READ 0 -#define CTRL1_THRESHOLD_MASK GENMASK(23, 16) -#define CTRL1_THRESHOLD(x) ((x) << 16) #define CTRL1_FRDDR_DEPTH_MASK GENMASK(31, 24) #define CTRL1_FRDDR_DEPTH(x) ((x) << 24) #define FIFO_START_ADDR 0x08 @@ -67,12 +67,14 @@ struct axg_fifo { struct regmap *map; struct clk *pclk; struct reset_control *arb; + struct regmap_field *field_threshold; int irq; }; struct axg_fifo_match_data { const struct snd_soc_component_driver *component_drv; struct snd_soc_dai_driver *dai_drv; + struct reg_field field_threshold; }; extern const struct snd_pcm_ops axg_fifo_pcm_ops; diff --git a/sound/soc/meson/axg-frddr.c b/sound/soc/meson/axg-frddr.c index 6ab111c31b28..09773a9ae964 100644 --- a/sound/soc/meson/axg-frddr.c +++ b/sound/soc/meson/axg-frddr.c @@ -50,7 +50,7 @@ static int axg_frddr_dai_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct axg_fifo *fifo = snd_soc_dai_get_drvdata(dai); - unsigned int fifo_depth, fifo_threshold; + unsigned int fifo_depth; int ret; /* Enable pclk to access registers and clock the fifo ip */ @@ -68,11 +68,8 @@ static int axg_frddr_dai_startup(struct snd_pcm_substream *substream, * Depth and threshold are zero based. */ fifo_depth = AXG_FIFO_MIN_CNT - 1; - fifo_threshold = (AXG_FIFO_MIN_CNT / 2) - 1; - regmap_update_bits(fifo->map, FIFO_CTRL1, - CTRL1_FRDDR_DEPTH_MASK | CTRL1_THRESHOLD_MASK, - CTRL1_FRDDR_DEPTH(fifo_depth) | - CTRL1_THRESHOLD(fifo_threshold)); + regmap_update_bits(fifo->map, FIFO_CTRL1, CTRL1_FRDDR_DEPTH_MASK, + CTRL1_FRDDR_DEPTH(fifo_depth)); return 0; } @@ -153,8 +150,9 @@ static const struct snd_soc_component_driver axg_frddr_component_drv = { }; static const struct axg_fifo_match_data axg_frddr_match_data = { - .component_drv = &axg_frddr_component_drv, - .dai_drv = &axg_frddr_dai_drv + .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23), + .component_drv = &axg_frddr_component_drv, + .dai_drv = &axg_frddr_dai_drv }; static const struct snd_soc_dai_ops g12a_frddr_ops = { @@ -271,8 +269,9 @@ static const struct snd_soc_component_driver g12a_frddr_component_drv = { }; static const struct axg_fifo_match_data g12a_frddr_match_data = { - .component_drv = &g12a_frddr_component_drv, - .dai_drv = &g12a_frddr_dai_drv + .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23), + .component_drv = &g12a_frddr_component_drv, + .dai_drv = &g12a_frddr_dai_drv }; /* On SM1, the output selection in on CTRL2 */ @@ -335,8 +334,9 @@ static const struct snd_soc_component_driver sm1_frddr_component_drv = { }; static const struct axg_fifo_match_data sm1_frddr_match_data = { - .component_drv = &sm1_frddr_component_drv, - .dai_drv = &g12a_frddr_dai_drv + .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23), + .component_drv = &sm1_frddr_component_drv, + .dai_drv = &g12a_frddr_dai_drv }; static const struct of_device_id axg_frddr_of_match[] = { diff --git a/sound/soc/meson/axg-toddr.c b/sound/soc/meson/axg-toddr.c index c8ea2145f576..ecf41c7549a6 100644 --- a/sound/soc/meson/axg-toddr.c +++ b/sound/soc/meson/axg-toddr.c @@ -89,7 +89,6 @@ static int axg_toddr_dai_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct axg_fifo *fifo = snd_soc_dai_get_drvdata(dai); - unsigned int fifo_threshold; int ret; /* Enable pclk to access registers and clock the fifo ip */ @@ -107,11 +106,6 @@ static int axg_toddr_dai_startup(struct snd_pcm_substream *substream, /* Apply single buffer mode to the interface */ regmap_update_bits(fifo->map, FIFO_CTRL0, CTRL0_TODDR_PP_MODE, 0); - /* TODDR does not have a configurable fifo depth */ - fifo_threshold = AXG_FIFO_MIN_CNT - 1; - regmap_update_bits(fifo->map, FIFO_CTRL1, CTRL1_THRESHOLD_MASK, - CTRL1_THRESHOLD(fifo_threshold)); - return 0; } @@ -185,8 +179,9 @@ static const struct snd_soc_component_driver axg_toddr_component_drv = { }; static const struct axg_fifo_match_data axg_toddr_match_data = { - .component_drv = &axg_toddr_component_drv, - .dai_drv = &axg_toddr_dai_drv + .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23), + .component_drv = &axg_toddr_component_drv, + .dai_drv = &axg_toddr_dai_drv }; static const struct snd_soc_dai_ops g12a_toddr_ops = { @@ -218,8 +213,9 @@ static const struct snd_soc_component_driver g12a_toddr_component_drv = { }; static const struct axg_fifo_match_data g12a_toddr_match_data = { - .component_drv = &g12a_toddr_component_drv, - .dai_drv = &g12a_toddr_dai_drv + .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23), + .component_drv = &g12a_toddr_component_drv, + .dai_drv = &g12a_toddr_dai_drv }; static const char * const sm1_toddr_sel_texts[] = { @@ -282,8 +278,9 @@ static const struct snd_soc_component_driver sm1_toddr_component_drv = { }; static const struct axg_fifo_match_data sm1_toddr_match_data = { - .component_drv = &sm1_toddr_component_drv, - .dai_drv = &g12a_toddr_dai_drv + .field_threshold = REG_FIELD(FIFO_CTRL1, 12, 23), + .component_drv = &sm1_toddr_component_drv, + .dai_drv = &g12a_toddr_dai_drv }; static const struct of_device_id axg_toddr_of_match[] = { diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c index 81f28f7ff1a0..12aec140819a 100644 --- a/sound/soc/sof/core.c +++ b/sound/soc/sof/core.c @@ -288,6 +288,46 @@ static int sof_machine_check(struct snd_sof_dev *sdev) #endif } +/* + * FW Boot State Transition Diagram + * + * +-----------------------------------------------------------------------+ + * | | + * ------------------ ------------------ | + * | | | | | + * | BOOT_FAILED | | READY_FAILED |-------------------------+ | + * | | | | | | + * ------------------ ------------------ | | + * ^ ^ | | + * | | | | + * (FW Boot Timeout) (FW_READY FAIL) | | + * | | | | + * | | | | + * ------------------ | ------------------ | | + * | | | | | | | + * | IN_PROGRESS |---------------+------------->| COMPLETE | | | + * | | (FW Boot OK) (FW_READY OK) | | | | + * ------------------ ------------------ | | + * ^ | | | + * | | | | + * (FW Loading OK) (System Suspend/Runtime Suspend) + * | | | | + * | | | | + * ------------------ ------------------ | | | + * | | | |<-----+ | | + * | PREPARE | | NOT_STARTED |<---------------------+ | + * | | | |<---------------------------+ + * ------------------ ------------------ + * | ^ | ^ + * | | | | + * | +-----------------------+ | + * | (DSP Probe OK) | + * | | + * | | + * +------------------------------------+ + * (System Suspend/Runtime Suspend) + */ + static int sof_probe_continue(struct snd_sof_dev *sdev) { struct snd_sof_pdata *plat_data = sdev->pdata; @@ -303,6 +343,8 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) return ret; } + sdev->fw_state = SOF_FW_BOOT_PREPARE; + /* check machine info */ ret = sof_machine_check(sdev); if (ret < 0) { @@ -342,7 +384,12 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) goto fw_load_err; } - /* boot the firmware */ + sdev->fw_state = SOF_FW_BOOT_IN_PROGRESS; + + /* + * Boot the firmware. The FW boot status will be modified + * in snd_sof_run_firmware() depending on the outcome. + */ ret = snd_sof_run_firmware(sdev); if (ret < 0) { dev_err(sdev->dev, "error: failed to boot DSP firmware %d\n", @@ -368,7 +415,7 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) if (ret < 0) { dev_err(sdev->dev, "error: failed to register DSP DAI driver %d\n", ret); - goto fw_run_err; + goto fw_trace_err; } drv_name = plat_data->machine->drv_name; @@ -382,7 +429,7 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) if (IS_ERR(plat_data->pdev_mach)) { ret = PTR_ERR(plat_data->pdev_mach); - goto fw_run_err; + goto fw_trace_err; } dev_dbg(sdev->dev, "created machine %s\n", @@ -393,7 +440,8 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) return 0; -#if !IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE) +fw_trace_err: + snd_sof_free_trace(sdev); fw_run_err: snd_sof_fw_unload(sdev); fw_load_err: @@ -402,21 +450,10 @@ ipc_err: snd_sof_free_debug(sdev); dbg_err: snd_sof_remove(sdev); -#else - - /* - * when the probe_continue is handled in a work queue, the - * probe does not fail so we don't release resources here. - * They will be released with an explicit call to - * snd_sof_device_remove() when the PCI/ACPI device is removed - */ -fw_run_err: -fw_load_err: -ipc_err: -dbg_err: - -#endif + /* all resources freed, update state to match */ + sdev->fw_state = SOF_FW_BOOT_NOT_STARTED; + sdev->first_boot = true; return ret; } @@ -447,6 +484,7 @@ int snd_sof_device_probe(struct device *dev, struct snd_sof_pdata *plat_data) sdev->pdata = plat_data; sdev->first_boot = true; + sdev->fw_state = SOF_FW_BOOT_NOT_STARTED; dev_set_drvdata(dev, sdev); /* check all mandatory ops */ @@ -494,10 +532,12 @@ int snd_sof_device_remove(struct device *dev) if (IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE)) cancel_work_sync(&sdev->probe_work); - snd_sof_fw_unload(sdev); - snd_sof_ipc_free(sdev); - snd_sof_free_debug(sdev); - snd_sof_free_trace(sdev); + if (sdev->fw_state > SOF_FW_BOOT_NOT_STARTED) { + snd_sof_fw_unload(sdev); + snd_sof_ipc_free(sdev); + snd_sof_free_debug(sdev); + snd_sof_free_trace(sdev); + } /* * Unregister machine driver. This will unbind the snd_card which @@ -513,7 +553,8 @@ int snd_sof_device_remove(struct device *dev) * scheduled on, when they are unloaded. Therefore, the DSP must be * removed only after the topology has been unloaded. */ - snd_sof_remove(sdev); + if (sdev->fw_state > SOF_FW_BOOT_NOT_STARTED) + snd_sof_remove(sdev); /* release firmware */ release_firmware(pdata->fw); diff --git a/sound/soc/sof/intel/hda-loader.c b/sound/soc/sof/intel/hda-loader.c index 65c2af3fcaab..356bb134ae93 100644 --- a/sound/soc/sof/intel/hda-loader.c +++ b/sound/soc/sof/intel/hda-loader.c @@ -278,7 +278,6 @@ int hda_dsp_cl_boot_firmware(struct snd_sof_dev *sdev) /* init for booting wait */ init_waitqueue_head(&sdev->boot_wait); - sdev->boot_complete = false; /* prepare DMA for code loader stream */ tag = cl_stream_prepare(sdev, 0x40, stripped_firmware.size, diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index 5a5163eef2ef..3c4b604412f0 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -166,7 +166,7 @@ void hda_dsp_dump_skl(struct snd_sof_dev *sdev, u32 flags) panic = snd_sof_dsp_read(sdev, HDA_DSP_BAR, HDA_ADSP_ERROR_CODE_SKL + 0x4); - if (sdev->boot_complete) { + if (sdev->fw_state == SOF_FW_BOOT_COMPLETE) { hda_dsp_get_registers(sdev, &xoops, &panic_info, stack, HDA_DSP_STACK_DUMP_SIZE); snd_sof_get_status(sdev, status, panic, &xoops, &panic_info, @@ -193,7 +193,7 @@ void hda_dsp_dump(struct snd_sof_dev *sdev, u32 flags) HDA_DSP_SRAM_REG_FW_STATUS); panic = snd_sof_dsp_read(sdev, HDA_DSP_BAR, HDA_DSP_SRAM_REG_FW_TRACEP); - if (sdev->boot_complete) { + if (sdev->fw_state == SOF_FW_BOOT_COMPLETE) { hda_dsp_get_registers(sdev, &xoops, &panic_info, stack, HDA_DSP_STACK_DUMP_SIZE); snd_sof_get_status(sdev, status, panic, &xoops, &panic_info, diff --git a/sound/soc/sof/ipc.c b/sound/soc/sof/ipc.c index 7b6d69783e16..8984d965037d 100644 --- a/sound/soc/sof/ipc.c +++ b/sound/soc/sof/ipc.c @@ -348,19 +348,12 @@ void snd_sof_ipc_msgs_rx(struct snd_sof_dev *sdev) break; case SOF_IPC_FW_READY: /* check for FW boot completion */ - if (!sdev->boot_complete) { + if (sdev->fw_state == SOF_FW_BOOT_IN_PROGRESS) { err = sof_ops(sdev)->fw_ready(sdev, cmd); - if (err < 0) { - /* - * this indicates a mismatch in ABI - * between the driver and fw - */ - dev_err(sdev->dev, "error: ABI mismatch %d\n", - err); - } else { - /* firmware boot completed OK */ - sdev->boot_complete = true; - } + if (err < 0) + sdev->fw_state = SOF_FW_BOOT_READY_FAILED; + else + sdev->fw_state = SOF_FW_BOOT_COMPLETE; /* wake up firmware loader */ wake_up(&sdev->boot_wait); diff --git a/sound/soc/sof/loader.c b/sound/soc/sof/loader.c index a041adf0669d..ce114df5e4fc 100644 --- a/sound/soc/sof/loader.c +++ b/sound/soc/sof/loader.c @@ -511,7 +511,6 @@ int snd_sof_run_firmware(struct snd_sof_dev *sdev) int init_core_mask; init_waitqueue_head(&sdev->boot_wait); - sdev->boot_complete = false; /* create read-only fw_version debugfs to store boot version info */ if (sdev->first_boot) { @@ -543,19 +542,27 @@ int snd_sof_run_firmware(struct snd_sof_dev *sdev) init_core_mask = ret; - /* now wait for the DSP to boot */ - ret = wait_event_timeout(sdev->boot_wait, sdev->boot_complete, + /* + * now wait for the DSP to boot. There are 3 possible outcomes: + * 1. Boot wait times out indicating FW boot failure. + * 2. FW boots successfully and fw_ready op succeeds. + * 3. FW boots but fw_ready op fails. + */ + ret = wait_event_timeout(sdev->boot_wait, + sdev->fw_state > SOF_FW_BOOT_IN_PROGRESS, msecs_to_jiffies(sdev->boot_timeout)); if (ret == 0) { dev_err(sdev->dev, "error: firmware boot failure\n"); snd_sof_dsp_dbg_dump(sdev, SOF_DBG_REGS | SOF_DBG_MBOX | SOF_DBG_TEXT | SOF_DBG_PCI); - /* after this point FW_READY msg should be ignored */ - sdev->boot_complete = true; + sdev->fw_state = SOF_FW_BOOT_FAILED; return -EIO; } - dev_info(sdev->dev, "firmware boot complete\n"); + if (sdev->fw_state == SOF_FW_BOOT_COMPLETE) + dev_info(sdev->dev, "firmware boot complete\n"); + else + return -EIO; /* FW boots but fw_ready op failed */ /* perform post fw run operations */ ret = snd_sof_dsp_post_fw_run(sdev); diff --git a/sound/soc/sof/pm.c b/sound/soc/sof/pm.c index e23beaeefe00..195af259e78e 100644 --- a/sound/soc/sof/pm.c +++ b/sound/soc/sof/pm.c @@ -269,6 +269,10 @@ static int sof_resume(struct device *dev, bool runtime_resume) if (!sof_ops(sdev)->resume || !sof_ops(sdev)->runtime_resume) return 0; + /* DSP was never successfully started, nothing to resume */ + if (sdev->first_boot) + return 0; + /* * if the runtime_resume flag is set, call the runtime_resume routine * or else call the system resume routine @@ -283,6 +287,8 @@ static int sof_resume(struct device *dev, bool runtime_resume) return ret; } + sdev->fw_state = SOF_FW_BOOT_PREPARE; + /* load the firmware */ ret = snd_sof_load_firmware(sdev); if (ret < 0) { @@ -292,7 +298,12 @@ static int sof_resume(struct device *dev, bool runtime_resume) return ret; } - /* boot the firmware */ + sdev->fw_state = SOF_FW_BOOT_IN_PROGRESS; + + /* + * Boot the firmware. The FW boot status will be modified + * in snd_sof_run_firmware() depending on the outcome. + */ ret = snd_sof_run_firmware(sdev); if (ret < 0) { dev_err(sdev->dev, @@ -338,6 +349,9 @@ static int sof_suspend(struct device *dev, bool runtime_suspend) if (!sof_ops(sdev)->suspend) return 0; + if (sdev->fw_state != SOF_FW_BOOT_COMPLETE) + goto power_down; + /* release trace */ snd_sof_release_trace(sdev); @@ -375,6 +389,12 @@ static int sof_suspend(struct device *dev, bool runtime_suspend) ret); } +power_down: + + /* return if the DSP was not probed successfully */ + if (sdev->fw_state == SOF_FW_BOOT_NOT_STARTED) + return 0; + /* power down all DSP cores */ if (runtime_suspend) ret = snd_sof_dsp_runtime_suspend(sdev); @@ -385,6 +405,9 @@ static int sof_suspend(struct device *dev, bool runtime_suspend) "error: failed to power down DSP during suspend %d\n", ret); + /* reset FW state */ + sdev->fw_state = SOF_FW_BOOT_NOT_STARTED; + return ret; } diff --git a/sound/soc/sof/sof-priv.h b/sound/soc/sof/sof-priv.h index 730f3259dd02..7b329bd99674 100644 --- a/sound/soc/sof/sof-priv.h +++ b/sound/soc/sof/sof-priv.h @@ -356,6 +356,15 @@ struct snd_sof_dai { struct list_head list; /* list in sdev dai list */ }; +enum snd_sof_fw_state { + SOF_FW_BOOT_NOT_STARTED = 0, + SOF_FW_BOOT_PREPARE, + SOF_FW_BOOT_IN_PROGRESS, + SOF_FW_BOOT_FAILED, + SOF_FW_BOOT_READY_FAILED, /* firmware booted but fw_ready op failed */ + SOF_FW_BOOT_COMPLETE, +}; + /* * SOF Device Level. */ @@ -372,7 +381,7 @@ struct snd_sof_dev { /* DSP firmware boot */ wait_queue_head_t boot_wait; - u32 boot_complete; + enum snd_sof_fw_state fw_state; u32 first_boot; /* work queue in case the probe is implemented in two steps */ diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 94b903d95afa..74c00c905d24 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -558,11 +558,11 @@ static const struct scarlett2_config /* proprietary request/response format */ struct scarlett2_usb_packet { - u32 cmd; - u16 size; - u16 seq; - u32 error; - u32 pad; + __le32 cmd; + __le16 size; + __le16 seq; + __le32 error; + __le32 pad; u8 data[]; }; @@ -664,11 +664,11 @@ static int scarlett2_usb( "Scarlett Gen 2 USB invalid response; " "cmd tx/rx %d/%d seq %d/%d size %d/%d " "error %d pad %d\n", - le16_to_cpu(req->cmd), le16_to_cpu(resp->cmd), + le32_to_cpu(req->cmd), le32_to_cpu(resp->cmd), le16_to_cpu(req->seq), le16_to_cpu(resp->seq), resp_size, le16_to_cpu(resp->size), - le16_to_cpu(resp->error), - le16_to_cpu(resp->pad)); + le32_to_cpu(resp->error), + le32_to_cpu(resp->pad)); err = -EINVAL; goto unlock; } @@ -687,7 +687,7 @@ error: /* Send SCARLETT2_USB_DATA_CMD SCARLETT2_USB_CONFIG_SAVE */ static void scarlett2_config_save(struct usb_mixer_interface *mixer) { - u32 req = cpu_to_le32(SCARLETT2_USB_CONFIG_SAVE); + __le32 req = cpu_to_le32(SCARLETT2_USB_CONFIG_SAVE); scarlett2_usb(mixer, SCARLETT2_USB_DATA_CMD, &req, sizeof(u32), @@ -713,11 +713,11 @@ static int scarlett2_usb_set_config( const struct scarlett2_config config_item = scarlett2_config_items[config_item_num]; struct { - u32 offset; - u32 bytes; - s32 value; + __le32 offset; + __le32 bytes; + __le32 value; } __packed req; - u32 req2; + __le32 req2; int err; struct scarlett2_mixer_data *private = mixer->private_data; @@ -753,8 +753,8 @@ static int scarlett2_usb_get( int offset, void *buf, int size) { struct { - u32 offset; - u32 size; + __le32 offset; + __le32 size; } __packed req; req.offset = cpu_to_le32(offset); @@ -794,8 +794,8 @@ static int scarlett2_usb_set_mix(struct usb_mixer_interface *mixer, const struct scarlett2_device_info *info = private->info; struct { - u16 mix_num; - u16 data[SCARLETT2_INPUT_MIX_MAX]; + __le16 mix_num; + __le16 data[SCARLETT2_INPUT_MIX_MAX]; } __packed req; int i, j; @@ -850,9 +850,9 @@ static int scarlett2_usb_set_mux(struct usb_mixer_interface *mixer) }; struct { - u16 pad; - u16 num; - u32 data[SCARLETT2_MUX_MAX]; + __le16 pad; + __le16 num; + __le32 data[SCARLETT2_MUX_MAX]; } __packed req; req.pad = 0; @@ -911,9 +911,9 @@ static int scarlett2_usb_get_meter_levels(struct usb_mixer_interface *mixer, u16 *levels) { struct { - u16 pad; - u16 num_meters; - u32 magic; + __le16 pad; + __le16 num_meters; + __le32 magic; } __packed req; u32 resp[SCARLETT2_NUM_METERS]; int i, err; diff --git a/sound/usb/validate.c b/sound/usb/validate.c index 389e8657434a..5a3c4f7882b0 100644 --- a/sound/usb/validate.c +++ b/sound/usb/validate.c @@ -110,7 +110,7 @@ static bool validate_processing_unit(const void *p, default: if (v->type == UAC1_EXTENSION_UNIT) return true; /* OK */ - switch (d->wProcessType) { + switch (le16_to_cpu(d->wProcessType)) { case UAC_PROCESS_UP_DOWNMIX: case UAC_PROCESS_DOLBY_PROLOGIC: if (d->bLength < len + 1) /* bNrModes */ @@ -125,7 +125,7 @@ static bool validate_processing_unit(const void *p, case UAC_VERSION_2: if (v->type == UAC2_EXTENSION_UNIT_V2) return true; /* OK */ - switch (d->wProcessType) { + switch (le16_to_cpu(d->wProcessType)) { case UAC2_PROCESS_UP_DOWNMIX: case UAC2_PROCESS_DOLBY_PROLOCIC: /* SiC! */ if (d->bLength < len + 1) /* bNrModes */ @@ -142,7 +142,7 @@ static bool validate_processing_unit(const void *p, len += 2; /* wClusterDescrID */ break; } - switch (d->wProcessType) { + switch (le16_to_cpu(d->wProcessType)) { case UAC3_PROCESS_UP_DOWNMIX: if (d->bLength < len + 1) /* bNrModes */ return false; diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index ad1b9e646c49..4cf93110c259 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -270,6 +270,7 @@ class ArchX86(Arch): def __init__(self, exit_reasons): self.sc_perf_evt_open = 298 self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reason_field = 'exit_reason' self.exit_reasons = exit_reasons def debugfs_is_child(self, field): @@ -289,6 +290,7 @@ class ArchPPC(Arch): # numbers depend on the wordsize. char_ptr_size = ctypes.sizeof(ctypes.c_char_p) self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 + self.exit_reason_field = 'exit_nr' self.exit_reasons = {} def debugfs_is_child(self, field): @@ -300,6 +302,7 @@ class ArchA64(Arch): def __init__(self): self.sc_perf_evt_open = 241 self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reason_field = 'esr_ec' self.exit_reasons = AARCH64_EXIT_REASONS def debugfs_is_child(self, field): @@ -311,6 +314,7 @@ class ArchS390(Arch): def __init__(self): self.sc_perf_evt_open = 331 self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reason_field = None self.exit_reasons = None def debugfs_is_child(self, field): @@ -541,8 +545,8 @@ class TracepointProvider(Provider): """ filters = {} filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) - if ARCH.exit_reasons: - filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) + if ARCH.exit_reason_field and ARCH.exit_reasons: + filters['kvm_exit'] = (ARCH.exit_reason_field, ARCH.exit_reasons) return filters def _get_available_fields(self): diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index d98838c5820c..b6403712c2f4 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2541,7 +2541,9 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, if (strncmp(local_name, targ_name, local_essent_len) == 0) { pr_debug("[%d] %s: found candidate [%d] %s\n", local_type_id, local_name, i, targ_name); - new_ids = realloc(cand_ids->data, cand_ids->len + 1); + new_ids = reallocarray(cand_ids->data, + cand_ids->len + 1, + sizeof(*cand_ids->data)); if (!new_ids) { err = -ENOMEM; goto err_out; diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh index 0a832e265a50..c3ae1e8ae119 100755 --- a/tools/objtool/sync-check.sh +++ b/tools/objtool/sync-check.sh @@ -47,5 +47,3 @@ check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[ check arch/x86/include/asm/insn.h '-I "^#include [\"<]\(asm/\)*inat.h[\">]"' check arch/x86/lib/inat.c '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"' check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]"' - -cd - diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c index 2f55d4d23446..6e04304560ca 100644 --- a/tools/power/cpupower/lib/cpufreq.c +++ b/tools/power/cpupower/lib/cpufreq.c @@ -332,21 +332,74 @@ void cpufreq_put_available_governors(struct cpufreq_available_governors *any) } -struct cpufreq_frequencies -*cpufreq_get_frequencies(const char *type, unsigned int cpu) +struct cpufreq_available_frequencies +*cpufreq_get_available_frequencies(unsigned int cpu) { - struct cpufreq_frequencies *first = NULL; - struct cpufreq_frequencies *current = NULL; + struct cpufreq_available_frequencies *first = NULL; + struct cpufreq_available_frequencies *current = NULL; char one_value[SYSFS_PATH_MAX]; char linebuf[MAX_LINE_LEN]; - char fname[MAX_LINE_LEN]; unsigned int pos, i; unsigned int len; - snprintf(fname, MAX_LINE_LEN, "scaling_%s_frequencies", type); + len = sysfs_cpufreq_read_file(cpu, "scaling_available_frequencies", + linebuf, sizeof(linebuf)); + if (len == 0) + return NULL; - len = sysfs_cpufreq_read_file(cpu, fname, - linebuf, sizeof(linebuf)); + pos = 0; + for (i = 0; i < len; i++) { + if (linebuf[i] == ' ' || linebuf[i] == '\n') { + if (i - pos < 2) + continue; + if (i - pos >= SYSFS_PATH_MAX) + goto error_out; + if (current) { + current->next = malloc(sizeof(*current)); + if (!current->next) + goto error_out; + current = current->next; + } else { + first = malloc(sizeof(*first)); + if (!first) + goto error_out; + current = first; + } + current->first = first; + current->next = NULL; + + memcpy(one_value, linebuf + pos, i - pos); + one_value[i - pos] = '\0'; + if (sscanf(one_value, "%lu", ¤t->frequency) != 1) + goto error_out; + + pos = i + 1; + } + } + + return first; + + error_out: + while (first) { + current = first->next; + free(first); + first = current; + } + return NULL; +} + +struct cpufreq_available_frequencies +*cpufreq_get_boost_frequencies(unsigned int cpu) +{ + struct cpufreq_available_frequencies *first = NULL; + struct cpufreq_available_frequencies *current = NULL; + char one_value[SYSFS_PATH_MAX]; + char linebuf[MAX_LINE_LEN]; + unsigned int pos, i; + unsigned int len; + + len = sysfs_cpufreq_read_file(cpu, "scaling_boost_frequencies", + linebuf, sizeof(linebuf)); if (len == 0) return NULL; @@ -391,9 +444,9 @@ struct cpufreq_frequencies return NULL; } -void cpufreq_put_frequencies(struct cpufreq_frequencies *any) +void cpufreq_put_available_frequencies(struct cpufreq_available_frequencies *any) { - struct cpufreq_frequencies *tmp, *next; + struct cpufreq_available_frequencies *tmp, *next; if (!any) return; @@ -406,6 +459,11 @@ void cpufreq_put_frequencies(struct cpufreq_frequencies *any) } } +void cpufreq_put_boost_frequencies(struct cpufreq_available_frequencies *any) +{ + cpufreq_put_available_frequencies(any); +} + static struct cpufreq_affected_cpus *sysfs_get_cpu_list(unsigned int cpu, const char *file) { diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h index a55f0d19215b..95f4fd9e2656 100644 --- a/tools/power/cpupower/lib/cpufreq.h +++ b/tools/power/cpupower/lib/cpufreq.h @@ -20,10 +20,10 @@ struct cpufreq_available_governors { struct cpufreq_available_governors *first; }; -struct cpufreq_frequencies { +struct cpufreq_available_frequencies { unsigned long frequency; - struct cpufreq_frequencies *next; - struct cpufreq_frequencies *first; + struct cpufreq_available_frequencies *next; + struct cpufreq_available_frequencies *first; }; @@ -124,11 +124,17 @@ void cpufreq_put_available_governors( * cpufreq_put_frequencies after use. */ -struct cpufreq_frequencies -*cpufreq_get_frequencies(const char *type, unsigned int cpu); +struct cpufreq_available_frequencies +*cpufreq_get_available_frequencies(unsigned int cpu); -void cpufreq_put_frequencies( - struct cpufreq_frequencies *first); +void cpufreq_put_available_frequencies( + struct cpufreq_available_frequencies *first); + +struct cpufreq_available_frequencies +*cpufreq_get_boost_frequencies(unsigned int cpu); + +void cpufreq_put_boost_frequencies( + struct cpufreq_available_frequencies *first); /* determine affected CPUs diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index e63cf55f81cf..6efc0f6b1b11 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -244,14 +244,14 @@ static int get_boost_mode_x86(unsigned int cpu) static int get_boost_mode(unsigned int cpu) { - struct cpufreq_frequencies *freqs; + struct cpufreq_available_frequencies *freqs; if (cpupower_cpu_info.vendor == X86_VENDOR_AMD || cpupower_cpu_info.vendor == X86_VENDOR_HYGON || cpupower_cpu_info.vendor == X86_VENDOR_INTEL) return get_boost_mode_x86(cpu); - freqs = cpufreq_get_frequencies("boost", cpu); + freqs = cpufreq_get_boost_frequencies(cpu); if (freqs) { printf(_(" boost frequency steps: ")); while (freqs->next) { @@ -261,7 +261,7 @@ static int get_boost_mode(unsigned int cpu) } print_speed(freqs->frequency); printf("\n"); - cpufreq_put_frequencies(freqs); + cpufreq_put_available_frequencies(freqs); } return 0; @@ -475,7 +475,7 @@ static int get_latency(unsigned int cpu, unsigned int human) static void debug_output_one(unsigned int cpu) { - struct cpufreq_frequencies *freqs; + struct cpufreq_available_frequencies *freqs; get_driver(cpu); get_related_cpus(cpu); @@ -483,7 +483,7 @@ static void debug_output_one(unsigned int cpu) get_latency(cpu, 1); get_hardware_limits(cpu, 1); - freqs = cpufreq_get_frequencies("available", cpu); + freqs = cpufreq_get_available_frequencies(cpu); if (freqs) { printf(_(" available frequency steps: ")); while (freqs->next) { @@ -493,7 +493,7 @@ static void debug_output_one(unsigned int cpu) } print_speed(freqs->frequency); printf("\n"); - cpufreq_put_frequencies(freqs); + cpufreq_put_available_frequencies(freqs); } get_available_governors(cpu); diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 5ecc267d98b0..fad615c22e4d 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -2,7 +2,7 @@ #include ssize_t get_base_addr() { - size_t start; + size_t start, offset; char buf[256]; FILE *f; @@ -10,10 +10,11 @@ ssize_t get_base_addr() { if (!f) return -errno; - while (fscanf(f, "%zx-%*x %s %*s\n", &start, buf) == 2) { + while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n", + &start, buf, &offset) == 3) { if (strcmp(buf, "r-xp") == 0) { fclose(f); - return start; + return start - offset; } } diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index 3003fddc0613..cf6c87936c69 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -4,6 +4,7 @@ #include #include #include +#include "libbpf_internal.h" static void on_sample(void *ctx, int cpu, void *data, __u32 size) { @@ -19,7 +20,7 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size) void test_perf_buffer(void) { - int err, prog_fd, nr_cpus, i, duration = 0; + int err, prog_fd, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0; const char *prog_name = "kprobe/sys_nanosleep"; const char *file = "./test_perf_buffer.o"; struct perf_buffer_opts pb_opts = {}; @@ -29,15 +30,27 @@ void test_perf_buffer(void) struct bpf_object *obj; struct perf_buffer *pb; struct bpf_link *link; + bool *online; nr_cpus = libbpf_num_possible_cpus(); if (CHECK(nr_cpus < 0, "nr_cpus", "err %d\n", nr_cpus)) return; + err = parse_cpu_mask_file("/sys/devices/system/cpu/online", + &online, &on_len); + if (CHECK(err, "nr_on_cpus", "err %d\n", err)) + return; + + for (i = 0; i < on_len; i++) + if (online[i]) + nr_on_cpus++; + /* load program */ err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd); - if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) - return; + if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) { + obj = NULL; + goto out_close; + } prog = bpf_object__find_program_by_title(obj, prog_name); if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name)) @@ -64,6 +77,11 @@ void test_perf_buffer(void) /* trigger kprobe on every CPU */ CPU_ZERO(&cpu_seen); for (i = 0; i < nr_cpus; i++) { + if (i >= on_len || !online[i]) { + printf("skipping offline CPU #%d\n", i); + continue; + } + CPU_ZERO(&cpu_set); CPU_SET(i, &cpu_set); @@ -81,8 +99,8 @@ void test_perf_buffer(void) if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err)) goto out_free_pb; - if (CHECK(CPU_COUNT(&cpu_seen) != nr_cpus, "seen_cpu_cnt", - "expect %d, seen %d\n", nr_cpus, CPU_COUNT(&cpu_seen))) + if (CHECK(CPU_COUNT(&cpu_seen) != nr_on_cpus, "seen_cpu_cnt", + "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen))) goto out_free_pb; out_free_pb: @@ -91,4 +109,5 @@ out_detach: bpf_link__destroy(link); out_close: bpf_object__close(obj); + free(online); } diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index f62aa0eb959b..1735faf17536 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -49,8 +49,12 @@ retry: pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu 0 */, -1 /* group id */, 0 /* flags */); - if (CHECK(pmu_fd < 0, "perf_event_open", - "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n", + if (pmu_fd < 0 && errno == ENOENT) { + printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); + test__skip(); + goto cleanup; + } + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd, errno)) goto close_prog; diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c index ea7d84f01235..e6be383a003f 100644 --- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c +++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c @@ -113,6 +113,12 @@ int _select_by_skb_data(struct sk_reuseport_md *reuse_md) data_check.skb_ports[0] = th->source; data_check.skb_ports[1] = th->dest; + if (th->fin) + /* The connection is being torn down at the end of a + * test. It can't contain a cmd, so return early. + */ + return SK_PASS; + if ((th->doff << 2) + sizeof(*cmd) > data_check.len) GOTO_DONE(DROP_ERR_SKB_DATA); if (bpf_skb_load_bytes(reuse_md, th->doff << 2, &cmd_copy, diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 4a851513c842..779e11da979c 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -331,7 +331,7 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, FILE *file; int i, fp; - file = fopen(".sendpage_tst.tmp", "w+"); + file = tmpfile(); if (!file) { perror("create file for sendpage"); return 1; @@ -340,13 +340,8 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, fwrite(&k, sizeof(char), 1, file); fflush(file); fseek(file, 0, SEEK_SET); - fclose(file); - fp = open(".sendpage_tst.tmp", O_RDONLY); - if (fp < 0) { - perror("reopen file for sendpage"); - return 1; - } + fp = fileno(file); clock_gettime(CLOCK_MONOTONIC, &s->start); for (i = 0; i < cnt; i++) { @@ -354,11 +349,11 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, if (!drop && sent < 0) { perror("send loop error"); - close(fp); + fclose(file); return sent; } else if (drop && sent >= 0) { printf("sendpage loop error expected: %i\n", sent); - close(fp); + fclose(file); return -EIO; } @@ -366,7 +361,7 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, s->bytes_sent += sent; } clock_gettime(CLOCK_MONOTONIC, &s->end); - close(fp); + fclose(file); return 0; } diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py index e98c36750fae..d34fe06268d2 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py @@ -54,7 +54,7 @@ class SubPlugin(TdcPlugin): shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - env=ENVIR) + env=os.environ.copy()) (rawout, serr) = proc.communicate() if proc.returncode != 0 and len(serr) > 0: diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index c4c57ba99e90..631d397ac81b 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c @@ -10,6 +10,7 @@ * Author: Christoffer Dall */ +#include #include #include #include @@ -28,25 +29,115 @@ static const u8 return_offsets[8][2] = { [7] = { 4, 4 }, /* FIQ, unused */ }; +/* + * When an exception is taken, most CPSR fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). + * + * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with + * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was + * obsoleted by the ARMv7 virtualization extensions and is RES0. + * + * For the SPSR layout seen from AArch32, see: + * - ARM DDI 0406C.d, page B1-1148 + * - ARM DDI 0487E.a, page G8-6264 + * + * For the SPSR_ELx layout for AArch32 seen from AArch64, see: + * - ARM DDI 0487E.a, page C5-426 + * + * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from + * MSB to LSB. + */ +static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode) +{ + u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); + unsigned long old, new; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_AA32_N_BIT); + new |= (old & PSR_AA32_Z_BIT); + new |= (old & PSR_AA32_C_BIT); + new |= (old & PSR_AA32_V_BIT); + new |= (old & PSR_AA32_Q_BIT); + + // CPSR.IT[7:0] are set to zero upon any exception + // See ARM DDI 0487E.a, section G1.12.3 + // See ARM DDI 0406C.d, section B1.8.3 + + new |= (old & PSR_AA32_DIT_BIT); + + // CPSR.SSBS is set to SCTLR.DSSBS upon any exception + // See ARM DDI 0487E.a, page G8-6244 + if (sctlr & BIT(31)) + new |= PSR_AA32_SSBS_BIT; + + // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0 + // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page G8-6246 + new |= (old & PSR_AA32_PAN_BIT); + if (!(sctlr & BIT(23))) + new |= PSR_AA32_PAN_BIT; + + // SS does not exist in AArch32, so ignore + + // CPSR.IL is set to zero upon any exception + // See ARM DDI 0487E.a, page G1-5527 + + new |= (old & PSR_AA32_GE_MASK); + + // CPSR.IT[7:0] are set to zero upon any exception + // See prior comment above + + // CPSR.E is set to SCTLR.EE upon any exception + // See ARM DDI 0487E.a, page G8-6245 + // See ARM DDI 0406C.d, page B4-1701 + if (sctlr & BIT(25)) + new |= PSR_AA32_E_BIT; + + // CPSR.A is unchanged upon an exception to Undefined, Supervisor + // CPSR.A is set upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_A_BIT); + if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC) + new |= PSR_AA32_A_BIT; + + // CPSR.I is set upon any exception + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= PSR_AA32_I_BIT; + + // CPSR.F is set upon an exception to FIQ + // CPSR.F is unchanged upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_F_BIT); + if (mode == PSR_AA32_MODE_FIQ) + new |= PSR_AA32_F_BIT; + + // CPSR.T is set to SCTLR.TE upon any exception + // See ARM DDI 0487E.a, page G8-5514 + // See ARM DDI 0406C.d, page B1-1181 + if (sctlr & BIT(30)) + new |= PSR_AA32_T_BIT; + + new |= mode; + + return new; +} + static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) { - unsigned long cpsr; - unsigned long new_spsr_value = *vcpu_cpsr(vcpu); - bool is_thumb = (new_spsr_value & PSR_AA32_T_BIT); + unsigned long spsr = *vcpu_cpsr(vcpu); + bool is_thumb = (spsr & PSR_AA32_T_BIT); u32 return_offset = return_offsets[vect_offset >> 2][is_thumb]; u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); - cpsr = mode | PSR_AA32_I_BIT; - - if (sctlr & (1 << 30)) - cpsr |= PSR_AA32_T_BIT; - if (sctlr & (1 << 25)) - cpsr |= PSR_AA32_E_BIT; - - *vcpu_cpsr(vcpu) = cpsr; + *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode); /* Note: These now point to the banked copies */ - vcpu_write_spsr(vcpu, new_spsr_value); + vcpu_write_spsr(vcpu, host_spsr_to_spsr32(spsr)); *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; /* Branch to exception vector */ @@ -84,7 +175,7 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, fsr = &vcpu_cp15(vcpu, c5_DFSR); } - prepare_fault32(vcpu, PSR_AA32_MODE_ABT | PSR_AA32_A_BIT, vect_offset); + prepare_fault32(vcpu, PSR_AA32_MODE_ABT, vect_offset); *far = addr; diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c index 6af5c91337f2..f274fabb4301 100644 --- a/virt/kvm/arm/mmio.c +++ b/virt/kvm/arm/mmio.c @@ -105,6 +105,9 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) data = (data ^ mask) - mask; } + if (!vcpu->arch.mmio_decode.sixty_four) + data = data & 0xffffffff; + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, &data); data = vcpu_data_host_to_guest(vcpu, data, len); @@ -125,6 +128,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) unsigned long rt; int access_size; bool sign_extend; + bool sixty_four; if (kvm_vcpu_dabt_iss1tw(vcpu)) { /* page table accesses IO mem: tell guest to fix its TTBR */ @@ -138,11 +142,13 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) *is_write = kvm_vcpu_dabt_iswrite(vcpu); sign_extend = kvm_vcpu_dabt_issext(vcpu); + sixty_four = kvm_vcpu_dabt_issf(vcpu); rt = kvm_vcpu_dabt_get_rd(vcpu); *len = access_size; vcpu->arch.mmio_decode.sign_extend = sign_extend; vcpu->arch.mmio_decode.rt = rt; + vcpu->arch.mmio_decode.sixty_four = sixty_four; return 0; } diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 35305d6e68cc..d8ef708a2ef6 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -64,7 +64,7 @@ static void async_pf_execute(struct work_struct *work) struct mm_struct *mm = apf->mm; struct kvm_vcpu *vcpu = apf->vcpu; unsigned long addr = apf->addr; - gva_t gva = apf->gva; + gpa_t cr2_or_gpa = apf->cr2_or_gpa; int locked = 1; might_sleep(); @@ -92,7 +92,7 @@ static void async_pf_execute(struct work_struct *work) * this point */ - trace_kvm_async_pf_completed(addr, gva); + trace_kvm_async_pf_completed(addr, cr2_or_gpa); if (swq_has_sleeper(&vcpu->wq)) swake_up_one(&vcpu->wq); @@ -165,8 +165,8 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) } } -int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, - struct kvm_arch_async_pf *arch) +int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + unsigned long hva, struct kvm_arch_async_pf *arch) { struct kvm_async_pf *work; @@ -185,7 +185,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, work->wakeup_all = false; work->vcpu = vcpu; - work->gva = gva; + work->cr2_or_gpa = cr2_or_gpa; work->addr = hva; work->arch = *arch; work->mm = current->mm; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 13efc291b1c7..b5ea1bafe513 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1394,14 +1394,14 @@ bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); -unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn) +unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn) { struct vm_area_struct *vma; unsigned long addr, size; size = PAGE_SIZE; - addr = gfn_to_hva(kvm, gfn); + addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gfn, NULL); if (kvm_is_error_hva(addr)) return PAGE_SIZE; @@ -1809,26 +1809,72 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_page); -static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn, - struct kvm_host_map *map) +void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache) +{ + if (pfn == 0) + return; + + if (cache) + cache->pfn = cache->gfn = 0; + + if (dirty) + kvm_release_pfn_dirty(pfn); + else + kvm_release_pfn_clean(pfn); +} + +static void kvm_cache_gfn_to_pfn(struct kvm_memory_slot *slot, gfn_t gfn, + struct gfn_to_pfn_cache *cache, u64 gen) +{ + kvm_release_pfn(cache->pfn, cache->dirty, cache); + + cache->pfn = gfn_to_pfn_memslot(slot, gfn); + cache->gfn = gfn; + cache->dirty = false; + cache->generation = gen; +} + +static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn, + struct kvm_host_map *map, + struct gfn_to_pfn_cache *cache, + bool atomic) { kvm_pfn_t pfn; void *hva = NULL; struct page *page = KVM_UNMAPPED_PAGE; + struct kvm_memory_slot *slot = __gfn_to_memslot(slots, gfn); + u64 gen = slots->generation; if (!map) return -EINVAL; - pfn = gfn_to_pfn_memslot(slot, gfn); + if (cache) { + if (!cache->pfn || cache->gfn != gfn || + cache->generation != gen) { + if (atomic) + return -EAGAIN; + kvm_cache_gfn_to_pfn(slot, gfn, cache, gen); + } + pfn = cache->pfn; + } else { + if (atomic) + return -EAGAIN; + pfn = gfn_to_pfn_memslot(slot, gfn); + } if (is_error_noslot_pfn(pfn)) return -EINVAL; if (pfn_valid(pfn)) { page = pfn_to_page(pfn); - hva = kmap(page); + if (atomic) + hva = kmap_atomic(page); + else + hva = kmap(page); #ifdef CONFIG_HAS_IOMEM - } else { + } else if (!atomic) { hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB); + } else { + return -EINVAL; #endif } @@ -1843,14 +1889,25 @@ static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn, return 0; } +int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map, + struct gfn_to_pfn_cache *cache, bool atomic) +{ + return __kvm_map_gfn(kvm_memslots(vcpu->kvm), gfn, map, + cache, atomic); +} +EXPORT_SYMBOL_GPL(kvm_map_gfn); + int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map) { - return __kvm_map_gfn(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, map); + return __kvm_map_gfn(kvm_vcpu_memslots(vcpu), gfn, map, + NULL, false); } EXPORT_SYMBOL_GPL(kvm_vcpu_map); -void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, - bool dirty) +static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot, + struct kvm_host_map *map, + struct gfn_to_pfn_cache *cache, + bool dirty, bool atomic) { if (!map) return; @@ -1858,23 +1915,45 @@ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, if (!map->hva) return; - if (map->page != KVM_UNMAPPED_PAGE) - kunmap(map->page); + if (map->page != KVM_UNMAPPED_PAGE) { + if (atomic) + kunmap_atomic(map->hva); + else + kunmap(map->page); + } #ifdef CONFIG_HAS_IOMEM - else + else if (!atomic) memunmap(map->hva); + else + WARN_ONCE(1, "Unexpected unmapping in atomic context"); #endif - if (dirty) { - kvm_vcpu_mark_page_dirty(vcpu, map->gfn); - kvm_release_pfn_dirty(map->pfn); - } else { - kvm_release_pfn_clean(map->pfn); - } + if (dirty) + mark_page_dirty_in_slot(memslot, map->gfn); + + if (cache) + cache->dirty |= dirty; + else + kvm_release_pfn(map->pfn, dirty, NULL); map->hva = NULL; map->page = NULL; } + +int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, + struct gfn_to_pfn_cache *cache, bool dirty, bool atomic) +{ + __kvm_unmap_gfn(gfn_to_memslot(vcpu->kvm, map->gfn), map, + cache, dirty, atomic); + return 0; +} +EXPORT_SYMBOL_GPL(kvm_unmap_gfn); + +void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty) +{ + __kvm_unmap_gfn(kvm_vcpu_gfn_to_memslot(vcpu, map->gfn), map, NULL, + dirty, false); +} EXPORT_SYMBOL_GPL(kvm_vcpu_unmap); struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn)