diff --git a/Makefile b/Makefile index 7ab22f105a032..fb2937bca41b3 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 46 +SUBLEVEL = 47 EXTRAVERSION = NAME = Dare mighty things diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index f90479d8b50c8..b06602cea99c7 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -544,9 +544,11 @@ void notrace cpu_init(void) * In Thumb-2, msr with an immediate value is not allowed. */ #ifdef CONFIG_THUMB2_KERNEL -#define PLC "r" +#define PLC_l "l" +#define PLC_r "r" #else -#define PLC "I" +#define PLC_l "I" +#define PLC_r "I" #endif /* @@ -568,15 +570,15 @@ void notrace cpu_init(void) "msr cpsr_c, %9" : : "r" (stk), - PLC (PSR_F_BIT | PSR_I_BIT | IRQ_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | IRQ_MODE), "I" (offsetof(struct stack, irq[0])), - PLC (PSR_F_BIT | PSR_I_BIT | ABT_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | ABT_MODE), "I" (offsetof(struct stack, abt[0])), - PLC (PSR_F_BIT | PSR_I_BIT | UND_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | UND_MODE), "I" (offsetof(struct stack, und[0])), - PLC (PSR_F_BIT | PSR_I_BIT | FIQ_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | FIQ_MODE), "I" (offsetof(struct stack, fiq[0])), - PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE) + PLC_l (PSR_F_BIT | PSR_I_BIT | SVC_MODE) : "r14"); #endif } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index a985d292e8203..c0a7f0d90b39d 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -174,14 +174,21 @@ static void __init reserve_elfcorehdr(void) #endif /* CONFIG_CRASH_DUMP */ /* - * Return the maximum physical address for a zone with a given address size - * limit. It currently assumes that for memory starting above 4G, 32-bit - * devices will use a DMA offset. + * Return the maximum physical address for a zone accessible by the given bits + * limit. If DRAM starts above 32-bit, expand the zone to the maximum + * available memory, otherwise cap it at 32-bit. */ static phys_addr_t __init max_zone_phys(unsigned int zone_bits) { - phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, zone_bits); - return min(offset + (1ULL << zone_bits), memblock_end_of_DRAM()); + phys_addr_t zone_mask = DMA_BIT_MASK(zone_bits); + phys_addr_t phys_start = memblock_start_of_DRAM(); + + if (phys_start > U32_MAX) + zone_mask = PHYS_ADDR_MAX; + else if (phys_start > zone_mask) + zone_mask = U32_MAX; + + return min(zone_mask, memblock_end_of_DRAM() - 1) + 1; } static void __init zone_sizes_init(unsigned long min, unsigned long max) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index afdad76078506..58dc93e566179 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -469,6 +469,21 @@ void __init mark_linear_text_alias_ro(void) PAGE_KERNEL_RO); } +static bool crash_mem_map __initdata; + +static int __init enable_crash_mem_map(char *arg) +{ + /* + * Proper parameter parsing is done by reserve_crashkernel(). We only + * need to know if the linear map has to avoid block mappings so that + * the crashkernel reservations can be unmapped later. + */ + crash_mem_map = true; + + return 0; +} +early_param("crashkernel", enable_crash_mem_map); + static void __init map_mem(pgd_t *pgdp) { phys_addr_t kernel_start = __pa_symbol(_text); @@ -477,7 +492,7 @@ static void __init map_mem(pgd_t *pgdp) int flags = 0; u64 i; - if (rodata_full || debug_pagealloc_enabled()) + if (rodata_full || crash_mem_map || debug_pagealloc_enabled()) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* @@ -487,11 +502,6 @@ static void __init map_mem(pgd_t *pgdp) * the following for-loop */ memblock_mark_nomap(kernel_start, kernel_end - kernel_start); -#ifdef CONFIG_KEXEC_CORE - if (crashk_res.end) - memblock_mark_nomap(crashk_res.start, - resource_size(&crashk_res)); -#endif /* map all the memory banks */ for_each_mem_range(i, &start, &end) { @@ -519,21 +529,6 @@ static void __init map_mem(pgd_t *pgdp) __map_memblock(pgdp, kernel_start, kernel_end, PAGE_KERNEL, NO_CONT_MAPPINGS); memblock_clear_nomap(kernel_start, kernel_end - kernel_start); - -#ifdef CONFIG_KEXEC_CORE - /* - * Use page-level mappings here so that we can shrink the region - * in page granularity and put back unused memory to buddy system - * through /sys/kernel/kexec_crash_size interface. - */ - if (crashk_res.end) { - __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1, - PAGE_KERNEL, - NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); - memblock_clear_nomap(crashk_res.start, - resource_size(&crashk_res)); - } -#endif } void mark_rodata_ro(void) diff --git a/arch/mips/generic/board-boston.its.S b/arch/mips/generic/board-boston.its.S index a7f51f97b9102..c45ad27594218 100644 --- a/arch/mips/generic/board-boston.its.S +++ b/arch/mips/generic/board-boston.its.S @@ -1,22 +1,22 @@ / { images { - fdt@boston { + fdt-boston { description = "img,boston Device Tree"; data = /incbin/("boot/dts/img/boston.dtb"); type = "flat_dt"; arch = "mips"; compression = "none"; - hash@0 { + hash { algo = "sha1"; }; }; }; configurations { - conf@boston { + conf-boston { description = "Boston Linux kernel"; - kernel = "kernel@0"; - fdt = "fdt@boston"; + kernel = "kernel"; + fdt = "fdt-boston"; }; }; }; diff --git a/arch/mips/generic/board-ni169445.its.S b/arch/mips/generic/board-ni169445.its.S index e4cb4f95a8cc1..0a2e8f7a8526f 100644 --- a/arch/mips/generic/board-ni169445.its.S +++ b/arch/mips/generic/board-ni169445.its.S @@ -1,22 +1,22 @@ / { images { - fdt@ni169445 { + fdt-ni169445 { description = "NI 169445 device tree"; data = /incbin/("boot/dts/ni/169445.dtb"); type = "flat_dt"; arch = "mips"; compression = "none"; - hash@0 { + hash { algo = "sha1"; }; }; }; configurations { - conf@ni169445 { + conf-ni169445 { description = "NI 169445 Linux Kernel"; - kernel = "kernel@0"; - fdt = "fdt@ni169445"; + kernel = "kernel"; + fdt = "fdt-ni169445"; }; }; }; diff --git a/arch/mips/generic/board-ocelot.its.S b/arch/mips/generic/board-ocelot.its.S index 3da23988149a6..8c7e3a1b68d3d 100644 --- a/arch/mips/generic/board-ocelot.its.S +++ b/arch/mips/generic/board-ocelot.its.S @@ -1,40 +1,40 @@ /* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ / { images { - fdt@ocelot_pcb123 { + fdt-ocelot_pcb123 { description = "MSCC Ocelot PCB123 Device Tree"; data = /incbin/("boot/dts/mscc/ocelot_pcb123.dtb"); type = "flat_dt"; arch = "mips"; compression = "none"; - hash@0 { + hash { algo = "sha1"; }; }; - fdt@ocelot_pcb120 { + fdt-ocelot_pcb120 { description = "MSCC Ocelot PCB120 Device Tree"; data = /incbin/("boot/dts/mscc/ocelot_pcb120.dtb"); type = "flat_dt"; arch = "mips"; compression = "none"; - hash@0 { + hash { algo = "sha1"; }; }; }; configurations { - conf@ocelot_pcb123 { + conf-ocelot_pcb123 { description = "Ocelot Linux kernel"; - kernel = "kernel@0"; - fdt = "fdt@ocelot_pcb123"; + kernel = "kernel"; + fdt = "fdt-ocelot_pcb123"; }; - conf@ocelot_pcb120 { + conf-ocelot_pcb120 { description = "Ocelot Linux kernel"; - kernel = "kernel@0"; - fdt = "fdt@ocelot_pcb120"; + kernel = "kernel"; + fdt = "fdt-ocelot_pcb120"; }; }; }; diff --git a/arch/mips/generic/board-xilfpga.its.S b/arch/mips/generic/board-xilfpga.its.S index a2e773d3f14f4..08c1e900eb4ed 100644 --- a/arch/mips/generic/board-xilfpga.its.S +++ b/arch/mips/generic/board-xilfpga.its.S @@ -1,22 +1,22 @@ / { images { - fdt@xilfpga { + fdt-xilfpga { description = "MIPSfpga (xilfpga) Device Tree"; data = /incbin/("boot/dts/xilfpga/nexys4ddr.dtb"); type = "flat_dt"; arch = "mips"; compression = "none"; - hash@0 { + hash { algo = "sha1"; }; }; }; configurations { - conf@xilfpga { + conf-xilfpga { description = "MIPSfpga Linux kernel"; - kernel = "kernel@0"; - fdt = "fdt@xilfpga"; + kernel = "kernel"; + fdt = "fdt-xilfpga"; }; }; }; diff --git a/arch/mips/generic/vmlinux.its.S b/arch/mips/generic/vmlinux.its.S index 1a08438fd8930..3e254676540f4 100644 --- a/arch/mips/generic/vmlinux.its.S +++ b/arch/mips/generic/vmlinux.its.S @@ -6,7 +6,7 @@ #address-cells = ; images { - kernel@0 { + kernel { description = KERNEL_NAME; data = /incbin/(VMLINUX_BINARY); type = "kernel"; @@ -15,18 +15,18 @@ compression = VMLINUX_COMPRESSION; load = /bits/ ADDR_BITS ; entry = /bits/ ADDR_BITS ; - hash@0 { + hash { algo = "sha1"; }; }; }; configurations { - default = "conf@default"; + default = "conf-default"; - conf@default { + conf-default { description = "Generic Linux kernel"; - kernel = "kernel@0"; + kernel = "kernel"; }; }; }; diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index e241e0e85ac81..226c366072da3 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -14,7 +14,7 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y) LDFLAGS_vmlinux := --no-relax endif -ifeq ($(CONFIG_64BIT)$(CONFIG_CMODEL_MEDLOW),yy) +ifeq ($(CONFIG_CMODEL_MEDLOW),y) KBUILD_CFLAGS_MODULE += -mcmodel=medany endif diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index ee056f4a4fa30..ee582896b6a3f 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -90,12 +90,16 @@ struct stack_frame { CALL_ARGS_4(arg1, arg2, arg3, arg4); \ register unsigned long r4 asm("6") = (unsigned long)(arg5) -#define CALL_FMT_0 "=&d" (r2) : -#define CALL_FMT_1 "+&d" (r2) : -#define CALL_FMT_2 CALL_FMT_1 "d" (r3), -#define CALL_FMT_3 CALL_FMT_2 "d" (r4), -#define CALL_FMT_4 CALL_FMT_3 "d" (r5), -#define CALL_FMT_5 CALL_FMT_4 "d" (r6), +/* + * To keep this simple mark register 2-6 as being changed (volatile) + * by the called function, even though register 6 is saved/nonvolatile. + */ +#define CALL_FMT_0 "=&d" (r2) +#define CALL_FMT_1 "+&d" (r2) +#define CALL_FMT_2 CALL_FMT_1, "+&d" (r3) +#define CALL_FMT_3 CALL_FMT_2, "+&d" (r4) +#define CALL_FMT_4 CALL_FMT_3, "+&d" (r5) +#define CALL_FMT_5 CALL_FMT_4, "+&d" (r6) #define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory" #define CALL_CLOBBER_4 CALL_CLOBBER_5 @@ -117,7 +121,7 @@ struct stack_frame { " brasl 14,%[_fn]\n" \ " la 15,0(%[_prev])\n" \ : [_prev] "=&a" (prev), CALL_FMT_##nr \ - [_stack] "R" (stack), \ + : [_stack] "R" (stack), \ [_bc] "i" (offsetof(struct stack_frame, back_chain)), \ [_frame] "d" (frame), \ [_fn] "X" (fn) : CALL_CLOBBER_##nr); \ diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 2e4d91f3feea4..93a3122cd15ff 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -127,8 +127,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) /* User code screwed up. */ regs->ax = -EFAULT; - instrumentation_end(); local_irq_disable(); + instrumentation_end(); irqentry_exit_to_user_mode(regs); return false; } diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index a88c94d656931..e6db1a1f22d7d 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -45,9 +45,11 @@ #include "perf_event.h" struct x86_pmu x86_pmu __read_mostly; +static struct pmu pmu; DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, + .pmu = &pmu, }; DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key); @@ -372,10 +374,12 @@ int x86_reserve_hardware(void) if (!atomic_inc_not_zero(&pmc_refcount)) { mutex_lock(&pmc_reserve_mutex); if (atomic_read(&pmc_refcount) == 0) { - if (!reserve_pmc_hardware()) + if (!reserve_pmc_hardware()) { err = -EBUSY; - else + } else { reserve_ds_buffers(); + reserve_lbr_buffers(); + } } if (!err) atomic_inc(&pmc_refcount); @@ -710,16 +714,23 @@ void x86_pmu_enable_all(int added) } } -static struct pmu pmu; - static inline int is_x86_event(struct perf_event *event) { return event->pmu == &pmu; } -struct pmu *x86_get_pmu(void) +struct pmu *x86_get_pmu(unsigned int cpu) { - return &pmu; + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + + /* + * All CPUs of the hybrid type have been offline. + * The x86_get_pmu() should not be invoked. + */ + if (WARN_ON_ONCE(!cpuc->pmu)) + return &pmu; + + return cpuc->pmu; } /* * Event scheduler state: diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index ee659b5faf714..3b8b8eede1a8a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4747,7 +4747,7 @@ static void update_tfa_sched(void *ignored) * and if so force schedule out for all event types all contexts */ if (test_bit(3, cpuc->active_mask)) - perf_pmu_resched(x86_get_pmu()); + perf_pmu_resched(x86_get_pmu(smp_processor_id())); } static ssize_t show_sysctl_tfa(struct device *cdev, diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 31a7a6566d077..945d470f62d0f 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2076,7 +2076,7 @@ void __init intel_ds_init(void) PERF_SAMPLE_TIME; x86_pmu.flags |= PMU_FL_PEBS_ALL; pebs_qual = "-baseline"; - x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; + x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; } else { /* Only basic record supported */ x86_pmu.large_pebs_flags &= @@ -2091,7 +2091,7 @@ void __init intel_ds_init(void) if (x86_pmu.intel_cap.pebs_output_pt_available) { pr_cont("PEBS-via-PT, "); - x86_get_pmu()->capabilities |= PERF_PMU_CAP_AUX_OUTPUT; + x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT; } break; diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index e2b0efcba1017..9c1a013d56822 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -658,7 +658,6 @@ static inline bool branch_user_callstack(unsigned br_sel) void intel_pmu_lbr_add(struct perf_event *event) { - struct kmem_cache *kmem_cache = event->pmu->task_ctx_cache; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!x86_pmu.lbr_nr) @@ -696,16 +695,11 @@ void intel_pmu_lbr_add(struct perf_event *event) perf_sched_cb_inc(event->ctx->pmu); if (!cpuc->lbr_users++ && !event->total_time_running) intel_pmu_lbr_reset(); - - if (static_cpu_has(X86_FEATURE_ARCH_LBR) && - kmem_cache && !cpuc->lbr_xsave && - (cpuc->lbr_users != cpuc->lbr_pebs_users)) - cpuc->lbr_xsave = kmem_cache_alloc(kmem_cache, GFP_KERNEL); } void release_lbr_buffers(void) { - struct kmem_cache *kmem_cache = x86_get_pmu()->task_ctx_cache; + struct kmem_cache *kmem_cache; struct cpu_hw_events *cpuc; int cpu; @@ -714,6 +708,7 @@ void release_lbr_buffers(void) for_each_possible_cpu(cpu) { cpuc = per_cpu_ptr(&cpu_hw_events, cpu); + kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; if (kmem_cache && cpuc->lbr_xsave) { kmem_cache_free(kmem_cache, cpuc->lbr_xsave); cpuc->lbr_xsave = NULL; @@ -721,6 +716,27 @@ void release_lbr_buffers(void) } } +void reserve_lbr_buffers(void) +{ + struct kmem_cache *kmem_cache; + struct cpu_hw_events *cpuc; + int cpu; + + if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) + return; + + for_each_possible_cpu(cpu) { + cpuc = per_cpu_ptr(&cpu_hw_events, cpu); + kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; + if (!kmem_cache || cpuc->lbr_xsave) + continue; + + cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, + GFP_KERNEL | __GFP_ZERO, + cpu_to_node(cpu)); + } +} + void intel_pmu_lbr_del(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1609,7 +1625,7 @@ void intel_pmu_lbr_init_hsw(void) x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_map = hsw_lbr_sel_map; - x86_get_pmu()->task_ctx_cache = create_lbr_kmem_cache(size, 0); + x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); if (lbr_from_signext_quirk_needed()) static_branch_enable(&lbr_from_quirk_key); @@ -1629,7 +1645,7 @@ __init void intel_pmu_lbr_init_skl(void) x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_map = hsw_lbr_sel_map; - x86_get_pmu()->task_ctx_cache = create_lbr_kmem_cache(size, 0); + x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); /* * SW branch filter usage: @@ -1726,7 +1742,7 @@ static bool is_arch_lbr_xsave_available(void) void __init intel_pmu_arch_lbr_init(void) { - struct pmu *pmu = x86_get_pmu(); + struct pmu *pmu = x86_get_pmu(smp_processor_id()); union cpuid28_eax eax; union cpuid28_ebx ebx; union cpuid28_ecx ecx; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 6a8edfe59b09c..f07d77cffb3c6 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -326,6 +326,8 @@ struct cpu_hw_events { int n_pair; /* Large increment events */ void *kfree_on_online[X86_PERF_KFREE_MAX]; + + struct pmu *pmu; }; #define __EVENT_CONSTRAINT_RANGE(c, e, n, m, w, o, f) { \ @@ -897,7 +899,7 @@ static struct perf_pmu_events_ht_attr event_attr_##v = { \ .event_str_ht = ht, \ } -struct pmu *x86_get_pmu(void); +struct pmu *x86_get_pmu(unsigned int cpu); extern struct x86_pmu x86_pmu __read_mostly; static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx) @@ -1122,6 +1124,8 @@ void reserve_ds_buffers(void); void release_lbr_buffers(void); +void reserve_lbr_buffers(void); + extern struct event_constraint bts_constraint; extern struct event_constraint vlbr_constraint; @@ -1267,6 +1271,10 @@ static inline void release_lbr_buffers(void) { } +static inline void reserve_lbr_buffers(void) +{ +} + static inline int intel_pmu_init(void) { return 0; diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index fdee23ea4e173..16bf4d4a8159e 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -204,6 +204,14 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); } +static inline void fxsave(struct fxregs_state *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx)); + else + asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx)); +} + /* These macros all use (%edi)/(%rdi) as the single memory argument. */ #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" @@ -268,28 +276,6 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ -static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) -{ - u64 mask = xfeatures_mask_all; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - XSTATE_OP(XSAVES, xstate, lmask, hmask, err); - else - XSTATE_OP(XSAVE, xstate, lmask, hmask, err); - - /* We should never fault when copying to a kernel buffer: */ - WARN_ON_FPU(err); -} - /* * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index ec3ae30547920..b7b92cdf3add4 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -221,28 +221,18 @@ sanitize_restored_user_xstate(union fpregs_state *state, if (use_xsave()) { /* - * Note: we don't need to zero the reserved bits in the - * xstate_header here because we either didn't copy them at all, - * or we checked earlier that they aren't set. + * Clear all feature bits which are not set in + * user_xfeatures and clear all extended features + * for fx_only mode. */ + u64 mask = fx_only ? XFEATURE_MASK_FPSSE : user_xfeatures; /* - * 'user_xfeatures' might have bits clear which are - * set in header->xfeatures. This represents features that - * were in init state prior to a signal delivery, and need - * to be reset back to the init state. Clear any user - * feature bits which are set in the kernel buffer to get - * them back to the init state. - * - * Supervisor state is unchanged by input from userspace. - * Ensure supervisor state bits stay set and supervisor - * state is not modified. + * Supervisor state has to be preserved. The sigframe + * restore can only modify user features, i.e. @mask + * cannot contain them. */ - if (fx_only) - header->xfeatures = XFEATURE_MASK_FPSSE; - else - header->xfeatures &= user_xfeatures | - xfeatures_mask_supervisor(); + header->xfeatures &= mask | xfeatures_mask_supervisor(); } if (use_fxsr()) { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 67f1a03b9b235..80dcf0417f30b 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -440,6 +440,25 @@ static void __init print_xstate_offset_size(void) } } +/* + * All supported features have either init state all zeros or are + * handled in setup_init_fpu() individually. This is an explicit + * feature list and does not use XFEATURE_MASK*SUPPORTED to catch + * newly added supported features at build time and make people + * actually look at the init state for the new feature. + */ +#define XFEATURES_INIT_FPSTATE_HANDLED \ + (XFEATURE_MASK_FP | \ + XFEATURE_MASK_SSE | \ + XFEATURE_MASK_YMM | \ + XFEATURE_MASK_OPMASK | \ + XFEATURE_MASK_ZMM_Hi256 | \ + XFEATURE_MASK_Hi16_ZMM | \ + XFEATURE_MASK_PKRU | \ + XFEATURE_MASK_BNDREGS | \ + XFEATURE_MASK_BNDCSR | \ + XFEATURE_MASK_PASID) + /* * setup the xstate image representing the init state */ @@ -447,6 +466,10 @@ static void __init setup_init_fpu_buf(void) { static int on_boot_cpu __initdata = 1; + BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED | + XFEATURE_MASK_SUPERVISOR_SUPPORTED) != + XFEATURES_INIT_FPSTATE_HANDLED); + WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; @@ -466,10 +489,22 @@ static void __init setup_init_fpu_buf(void) copy_kernel_to_xregs_booting(&init_fpstate.xsave); /* - * Dump the init state again. This is to identify the init state - * of any feature which is not represented by all zero's. + * All components are now in init state. Read the state back so + * that init_fpstate contains all non-zero init state. This only + * works with XSAVE, but not with XSAVEOPT and XSAVES because + * those use the init optimization which skips writing data for + * components in init state. + * + * XSAVE could be used, but that would require to reshuffle the + * data when XSAVES is available because XSAVES uses xstate + * compaction. But doing so is a pointless exercise because most + * components have an all zeros init state except for the legacy + * ones (FP and SSE). Those can be saved with FXSAVE into the + * legacy area. Adding new features requires to ensure that init + * state is all zeroes or if not to add the necessary handling + * here. */ - copy_xregs_to_kernel_booting(&init_fpstate.xsave); + fxsave(&init_fpstate.fxsave); } static int xfeature_uncompacted_offset(int xfeature_nr) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 16b10b9436dc5..01547bdbfb061 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -130,9 +130,25 @@ static void sev_asid_free(int asid) mutex_unlock(&sev_bitmap_lock); } -static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) +static void sev_decommission(unsigned int handle) { struct sev_data_decommission *decommission; + + if (!handle) + return; + + decommission = kzalloc(sizeof(*decommission), GFP_KERNEL); + if (!decommission) + return; + + decommission->handle = handle; + sev_guest_decommission(decommission, NULL); + + kfree(decommission); +} + +static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) +{ struct sev_data_deactivate *data; if (!handle) @@ -152,15 +168,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) kfree(data); - decommission = kzalloc(sizeof(*decommission), GFP_KERNEL); - if (!decommission) - return; - - /* decommission handle */ - decommission->handle = handle; - sev_guest_decommission(decommission, NULL); - - kfree(decommission); + sev_decommission(handle); } static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) @@ -288,8 +296,10 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) /* Bind ASID to this guest */ ret = sev_bind_asid(kvm, start->handle, error); - if (ret) + if (ret) { + sev_decommission(start->handle); goto e_free_session; + } /* return handle to userspace */ params.handle = start->handle; diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 0a0e168be1cbe..9b0e771302cee 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -779,4 +779,48 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar); DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar); DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar); +#define RS690_LOWER_TOP_OF_DRAM2 0x30 +#define RS690_LOWER_TOP_OF_DRAM2_VALID 0x1 +#define RS690_UPPER_TOP_OF_DRAM2 0x31 +#define RS690_HTIU_NB_INDEX 0xA8 +#define RS690_HTIU_NB_INDEX_WR_ENABLE 0x100 +#define RS690_HTIU_NB_DATA 0xAC + +/* + * Some BIOS implementations support RAM above 4GB, but do not configure the + * PCI host to respond to bus master accesses for these addresses. These + * implementations set the TOP_OF_DRAM_SLOT1 register correctly, so PCI DMA + * works as expected for addresses below 4GB. + * + * Reference: "AMD RS690 ASIC Family Register Reference Guide" (pg. 2-57) + * https://www.amd.com/system/files/TechDocs/43372_rs690_rrg_3.00o.pdf + */ +static void rs690_fix_64bit_dma(struct pci_dev *pdev) +{ + u32 val = 0; + phys_addr_t top_of_dram = __pa(high_memory - 1) + 1; + + if (top_of_dram <= (1ULL << 32)) + return; + + pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, + RS690_LOWER_TOP_OF_DRAM2); + pci_read_config_dword(pdev, RS690_HTIU_NB_DATA, &val); + + if (val) + return; + + pci_info(pdev, "Adjusting top of DRAM to %pa for 64-bit DMA support\n", &top_of_dram); + + pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, + RS690_UPPER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE); + pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, top_of_dram >> 32); + + pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, + RS690_LOWER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE); + pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, + top_of_dram | RS690_LOWER_TOP_OF_DRAM2_VALID); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma); + #endif diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 8064df6382227..d3cdf467d91fa 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -586,8 +586,10 @@ DEFINE_IDTENTRY_RAW(xenpv_exc_debug) DEFINE_IDTENTRY_RAW(exc_xen_unknown_trap) { /* This should never happen and there is no way to handle it. */ + instrumentation_begin(); pr_err("Unknown trap in Xen PV mode."); BUG(); + instrumentation_end(); } struct trap_array_entry { diff --git a/certs/Kconfig b/certs/Kconfig index c94e93d8bccf0..ab88d2a7f3c7f 100644 --- a/certs/Kconfig +++ b/certs/Kconfig @@ -83,4 +83,21 @@ config SYSTEM_BLACKLIST_HASH_LIST wrapper to incorporate the list into the kernel. Each should be a string of hex digits. +config SYSTEM_REVOCATION_LIST + bool "Provide system-wide ring of revocation certificates" + depends on SYSTEM_BLACKLIST_KEYRING + depends on PKCS7_MESSAGE_PARSER=y + help + If set, this allows revocation certificates to be stored in the + blacklist keyring and implements a hook whereby a PKCS#7 message can + be checked to see if it matches such a certificate. + +config SYSTEM_REVOCATION_KEYS + string "X.509 certificates to be preloaded into the system blacklist keyring" + depends on SYSTEM_REVOCATION_LIST + help + If set, this option should be the filename of a PEM-formatted file + containing X.509 certificates to be included in the default blacklist + keyring. + endmenu diff --git a/certs/Makefile b/certs/Makefile index f4c25b67aad90..b6db52ebf0beb 100644 --- a/certs/Makefile +++ b/certs/Makefile @@ -3,8 +3,9 @@ # Makefile for the linux kernel signature checking certificates. # -obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o -obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o +obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o common.o +obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o common.o +obj-$(CONFIG_SYSTEM_REVOCATION_LIST) += revocation_certificates.o ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),"") obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_hashes.o else @@ -29,7 +30,7 @@ $(obj)/x509_certificate_list: scripts/extract-cert $(SYSTEM_TRUSTED_KEYS_SRCPREF $(call if_changed,extract_certs,$(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_TRUSTED_KEYS)) endif # CONFIG_SYSTEM_TRUSTED_KEYRING -clean-files := x509_certificate_list .x509.list +clean-files := x509_certificate_list .x509.list x509_revocation_list ifeq ($(CONFIG_MODULE_SIG),y) ############################################################################### @@ -104,3 +105,17 @@ targets += signing_key.x509 $(obj)/signing_key.x509: scripts/extract-cert $(X509_DEP) FORCE $(call if_changed,extract_certs,$(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY)) endif # CONFIG_MODULE_SIG + +ifeq ($(CONFIG_SYSTEM_REVOCATION_LIST),y) + +$(eval $(call config_filename,SYSTEM_REVOCATION_KEYS)) + +$(obj)/revocation_certificates.o: $(obj)/x509_revocation_list + +quiet_cmd_extract_certs = EXTRACT_CERTS $(patsubst "%",%,$(2)) + cmd_extract_certs = scripts/extract-cert $(2) $@ + +targets += x509_revocation_list +$(obj)/x509_revocation_list: scripts/extract-cert $(SYSTEM_REVOCATION_KEYS_SRCPREFIX)$(SYSTEM_REVOCATION_KEYS_FILENAME) FORCE + $(call if_changed,extract_certs,$(SYSTEM_REVOCATION_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_REVOCATION_KEYS)) +endif diff --git a/certs/blacklist.c b/certs/blacklist.c index f1c434b04b5e4..c973de883cf02 100644 --- a/certs/blacklist.c +++ b/certs/blacklist.c @@ -16,9 +16,15 @@ #include #include #include "blacklist.h" +#include "common.h" static struct key *blacklist_keyring; +#ifdef CONFIG_SYSTEM_REVOCATION_LIST +extern __initconst const u8 revocation_certificate_list[]; +extern __initconst const unsigned long revocation_certificate_list_size; +#endif + /* * The description must be a type prefix, a colon and then an even number of * hex digits. The hash is kept in the description. @@ -144,6 +150,49 @@ int is_binary_blacklisted(const u8 *hash, size_t hash_len) } EXPORT_SYMBOL_GPL(is_binary_blacklisted); +#ifdef CONFIG_SYSTEM_REVOCATION_LIST +/** + * add_key_to_revocation_list - Add a revocation certificate to the blacklist + * @data: The data blob containing the certificate + * @size: The size of data blob + */ +int add_key_to_revocation_list(const char *data, size_t size) +{ + key_ref_t key; + + key = key_create_or_update(make_key_ref(blacklist_keyring, true), + "asymmetric", + NULL, + data, + size, + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW), + KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_BUILT_IN); + + if (IS_ERR(key)) { + pr_err("Problem with revocation key (%ld)\n", PTR_ERR(key)); + return PTR_ERR(key); + } + + return 0; +} + +/** + * is_key_on_revocation_list - Determine if the key for a PKCS#7 message is revoked + * @pkcs7: The PKCS#7 message to check + */ +int is_key_on_revocation_list(struct pkcs7_message *pkcs7) +{ + int ret; + + ret = pkcs7_validate_trust(pkcs7, blacklist_keyring); + + if (ret == 0) + return -EKEYREJECTED; + + return -ENOKEY; +} +#endif + /* * Initialise the blacklist */ @@ -177,3 +226,18 @@ static int __init blacklist_init(void) * Must be initialised before we try and load the keys into the keyring. */ device_initcall(blacklist_init); + +#ifdef CONFIG_SYSTEM_REVOCATION_LIST +/* + * Load the compiled-in list of revocation X.509 certificates. + */ +static __init int load_revocation_certificate_list(void) +{ + if (revocation_certificate_list_size) + pr_notice("Loading compiled-in revocation X.509 certificates\n"); + + return load_certificate_list(revocation_certificate_list, revocation_certificate_list_size, + blacklist_keyring); +} +late_initcall(load_revocation_certificate_list); +#endif diff --git a/certs/blacklist.h b/certs/blacklist.h index 1efd6fa0dc608..51b320cf85749 100644 --- a/certs/blacklist.h +++ b/certs/blacklist.h @@ -1,3 +1,5 @@ #include +#include +#include extern const char __initconst *const blacklist_hashes[]; diff --git a/certs/common.c b/certs/common.c new file mode 100644 index 0000000000000..16a220887a53e --- /dev/null +++ b/certs/common.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include "common.h" + +int load_certificate_list(const u8 cert_list[], + const unsigned long list_size, + const struct key *keyring) +{ + key_ref_t key; + const u8 *p, *end; + size_t plen; + + p = cert_list; + end = p + list_size; + while (p < end) { + /* Each cert begins with an ASN.1 SEQUENCE tag and must be more + * than 256 bytes in size. + */ + if (end - p < 4) + goto dodgy_cert; + if (p[0] != 0x30 && + p[1] != 0x82) + goto dodgy_cert; + plen = (p[2] << 8) | p[3]; + plen += 4; + if (plen > end - p) + goto dodgy_cert; + + key = key_create_or_update(make_key_ref(keyring, 1), + "asymmetric", + NULL, + p, + plen, + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ), + KEY_ALLOC_NOT_IN_QUOTA | + KEY_ALLOC_BUILT_IN | + KEY_ALLOC_BYPASS_RESTRICTION); + if (IS_ERR(key)) { + pr_err("Problem loading in-kernel X.509 certificate (%ld)\n", + PTR_ERR(key)); + } else { + pr_notice("Loaded X.509 cert '%s'\n", + key_ref_to_ptr(key)->description); + key_ref_put(key); + } + p += plen; + } + + return 0; + +dodgy_cert: + pr_err("Problem parsing in-kernel X.509 certificate list\n"); + return 0; +} diff --git a/certs/common.h b/certs/common.h new file mode 100644 index 0000000000000..abdb5795936b7 --- /dev/null +++ b/certs/common.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _CERT_COMMON_H +#define _CERT_COMMON_H + +int load_certificate_list(const u8 cert_list[], const unsigned long list_size, + const struct key *keyring); + +#endif diff --git a/certs/revocation_certificates.S b/certs/revocation_certificates.S new file mode 100644 index 0000000000000..f21aae8a8f0ef --- /dev/null +++ b/certs/revocation_certificates.S @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include + + __INITRODATA + + .align 8 + .globl revocation_certificate_list +revocation_certificate_list: +__revocation_list_start: + .incbin "certs/x509_revocation_list" +__revocation_list_end: + + .align 8 + .globl revocation_certificate_list_size +revocation_certificate_list_size: +#ifdef CONFIG_64BIT + .quad __revocation_list_end - __revocation_list_start +#else + .long __revocation_list_end - __revocation_list_start +#endif diff --git a/certs/system_keyring.c b/certs/system_keyring.c index 798291177186c..a44a8915c94cf 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c @@ -15,6 +15,7 @@ #include #include #include +#include "common.h" static struct key *builtin_trusted_keys; #ifdef CONFIG_SECONDARY_TRUSTED_KEYRING @@ -136,54 +137,10 @@ device_initcall(system_trusted_keyring_init); */ static __init int load_system_certificate_list(void) { - key_ref_t key; - const u8 *p, *end; - size_t plen; - pr_notice("Loading compiled-in X.509 certificates\n"); - p = system_certificate_list; - end = p + system_certificate_list_size; - while (p < end) { - /* Each cert begins with an ASN.1 SEQUENCE tag and must be more - * than 256 bytes in size. - */ - if (end - p < 4) - goto dodgy_cert; - if (p[0] != 0x30 && - p[1] != 0x82) - goto dodgy_cert; - plen = (p[2] << 8) | p[3]; - plen += 4; - if (plen > end - p) - goto dodgy_cert; - - key = key_create_or_update(make_key_ref(builtin_trusted_keys, 1), - "asymmetric", - NULL, - p, - plen, - ((KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ), - KEY_ALLOC_NOT_IN_QUOTA | - KEY_ALLOC_BUILT_IN | - KEY_ALLOC_BYPASS_RESTRICTION); - if (IS_ERR(key)) { - pr_err("Problem loading in-kernel X.509 certificate (%ld)\n", - PTR_ERR(key)); - } else { - pr_notice("Loaded X.509 cert '%s'\n", - key_ref_to_ptr(key)->description); - key_ref_put(key); - } - p += plen; - } - - return 0; - -dodgy_cert: - pr_err("Problem parsing in-kernel X.509 certificate list\n"); - return 0; + return load_certificate_list(system_certificate_list, system_certificate_list_size, + builtin_trusted_keys); } late_initcall(load_system_certificate_list); @@ -241,6 +198,12 @@ int verify_pkcs7_message_sig(const void *data, size_t len, pr_devel("PKCS#7 platform keyring is not available\n"); goto error; } + + ret = is_key_on_revocation_list(pkcs7); + if (ret != -ENOKEY) { + pr_devel("PKCS#7 platform key is on revocation list\n"); + goto error; + } } ret = pkcs7_validate_trust(pkcs7, trusted_keys); if (ret < 0) { diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index f2db761ee5488..f28bb2334e747 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -693,6 +693,7 @@ config XILINX_ZYNQMP_DMA config XILINX_ZYNQMP_DPDMA tristate "Xilinx DPDMA Engine" + depends on HAS_IOMEM && OF select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c index 27c07350971dd..375e7e647df6b 100644 --- a/drivers/dma/mediatek/mtk-uart-apdma.c +++ b/drivers/dma/mediatek/mtk-uart-apdma.c @@ -131,10 +131,7 @@ static unsigned int mtk_uart_apdma_read(struct mtk_chan *c, unsigned int reg) static void mtk_uart_apdma_desc_free(struct virt_dma_desc *vd) { - struct dma_chan *chan = vd->tx.chan; - struct mtk_chan *c = to_mtk_uart_apdma_chan(chan); - - kfree(c->desc); + kfree(container_of(vd, struct mtk_uart_apdma_desc, vd)); } static void mtk_uart_apdma_start_tx(struct mtk_chan *c) @@ -207,14 +204,9 @@ static void mtk_uart_apdma_start_rx(struct mtk_chan *c) static void mtk_uart_apdma_tx_handler(struct mtk_chan *c) { - struct mtk_uart_apdma_desc *d = c->desc; - mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_TX_INT_CLR_B); mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B); mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B); - - list_del(&d->vd.node); - vchan_cookie_complete(&d->vd); } static void mtk_uart_apdma_rx_handler(struct mtk_chan *c) @@ -245,9 +237,17 @@ static void mtk_uart_apdma_rx_handler(struct mtk_chan *c) c->rx_status = d->avail_len - cnt; mtk_uart_apdma_write(c, VFF_RPT, wg); +} - list_del(&d->vd.node); - vchan_cookie_complete(&d->vd); +static void mtk_uart_apdma_chan_complete_handler(struct mtk_chan *c) +{ + struct mtk_uart_apdma_desc *d = c->desc; + + if (d) { + list_del(&d->vd.node); + vchan_cookie_complete(&d->vd); + c->desc = NULL; + } } static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id) @@ -261,6 +261,7 @@ static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id) mtk_uart_apdma_rx_handler(c); else if (c->dir == DMA_MEM_TO_DEV) mtk_uart_apdma_tx_handler(c); + mtk_uart_apdma_chan_complete_handler(c); spin_unlock_irqrestore(&c->vc.lock, flags); return IRQ_HANDLED; @@ -348,7 +349,7 @@ static struct dma_async_tx_descriptor *mtk_uart_apdma_prep_slave_sg return NULL; /* Now allocate and setup the descriptor */ - d = kzalloc(sizeof(*d), GFP_ATOMIC); + d = kzalloc(sizeof(*d), GFP_NOWAIT); if (!d) return NULL; @@ -366,7 +367,7 @@ static void mtk_uart_apdma_issue_pending(struct dma_chan *chan) unsigned long flags; spin_lock_irqsave(&c->vc.lock, flags); - if (vchan_issue_pending(&c->vc)) { + if (vchan_issue_pending(&c->vc) && !c->desc) { vd = vchan_next_desc(&c->vc); c->desc = to_mtk_uart_apdma_desc(&vd->tx); diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index a57705356e8bb..991a7b5da29f0 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -1874,7 +1874,7 @@ static int rcar_dmac_probe(struct platform_device *pdev) /* Enable runtime PM and initialize the device. */ pm_runtime_enable(&pdev->dev); - ret = pm_runtime_get_sync(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); if (ret < 0) { dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret); return ret; diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index 08cfbfab837bb..9d473923712ad 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -1448,7 +1448,7 @@ static int stm32_mdma_alloc_chan_resources(struct dma_chan *c) return -ENOMEM; } - ret = pm_runtime_get_sync(dmadev->ddev.dev); + ret = pm_runtime_resume_and_get(dmadev->ddev.dev); if (ret < 0) return ret; @@ -1714,7 +1714,7 @@ static int stm32_mdma_pm_suspend(struct device *dev) u32 ccr, id; int ret; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c index ff7dfb3fdeb47..6c709803203ad 100644 --- a/drivers/dma/xilinx/xilinx_dpdma.c +++ b/drivers/dma/xilinx/xilinx_dpdma.c @@ -113,6 +113,7 @@ #define XILINX_DPDMA_CH_VDO 0x020 #define XILINX_DPDMA_CH_PYLD_SZ 0x024 #define XILINX_DPDMA_CH_DESC_ID 0x028 +#define XILINX_DPDMA_CH_DESC_ID_MASK GENMASK(15, 0) /* DPDMA descriptor fields */ #define XILINX_DPDMA_DESC_CONTROL_PREEMBLE 0xa5 @@ -866,7 +867,8 @@ static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan) * will be used, but it should be enough. */ list_for_each_entry(sw_desc, &desc->descriptors, node) - sw_desc->hw.desc_id = desc->vdesc.tx.cookie; + sw_desc->hw.desc_id = desc->vdesc.tx.cookie + & XILINX_DPDMA_CH_DESC_ID_MASK; sw_desc = list_first_entry(&desc->descriptors, struct xilinx_dpdma_sw_desc, node); @@ -1086,7 +1088,8 @@ static void xilinx_dpdma_chan_vsync_irq(struct xilinx_dpdma_chan *chan) if (!chan->running || !pending) goto out; - desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID); + desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID) + & XILINX_DPDMA_CH_DESC_ID_MASK; /* If the retrigger raced with vsync, retry at the next frame. */ sw_desc = list_first_entry(&pending->descriptors, diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c index d8419565b92cc..5fecf5aa6e858 100644 --- a/drivers/dma/xilinx/zynqmp_dma.c +++ b/drivers/dma/xilinx/zynqmp_dma.c @@ -468,7 +468,7 @@ static int zynqmp_dma_alloc_chan_resources(struct dma_chan *dchan) struct zynqmp_dma_desc_sw *desc; int i, ret; - ret = pm_runtime_get_sync(chan->dev); + ret = pm_runtime_resume_and_get(chan->dev); if (ret < 0) return ret; diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index ade3ecf2ee495..2613881a66e66 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -1865,6 +1865,7 @@ static void gpio_v2_line_info_changed_to_v1( struct gpio_v2_line_info_changed *lic_v2, struct gpioline_info_changed *lic_v1) { + memset(lic_v1, 0, sizeof(*lic_v1)); gpio_v2_line_info_to_v1(&lic_v2->info, &lic_v1->info); lic_v1->timestamp = lic_v2->timestamp_ns; lic_v1->event_type = lic_v2->event_type; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 1b56dbc1f304e..e93ccdc5faf4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -238,9 +238,21 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) { struct drm_gem_object *obj = attach->dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + int r; /* pin buffer into GTT */ - return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + if (r) + return r; + + if (bo->tbo.moving) { + r = dma_fence_wait(bo->tbo.moving, true); + if (r) { + amdgpu_bo_unpin(bo); + return r; + } + } + return 0; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 3c92dacbc24ad..fc8da5fed779b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6590,12 +6590,8 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring) if (ring->use_doorbell) { WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, (adev->doorbell_index.kiq * 2) << 2); - /* If GC has entered CGPG, ringing doorbell > first page doesn't - * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround - * this issue. - */ WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, - (adev->doorbell.size - 4)); + (adev->doorbell_index.userqueue_end * 2) << 2); } WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 1859d293ef712..fb15e8b5af32f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3619,12 +3619,8 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) if (ring->use_doorbell) { WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, (adev->doorbell_index.kiq * 2) << 2); - /* If GC has entered CGPG, ringing doorbell > first page doesn't - * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround - * this issue. - */ WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, - (adev->doorbell.size - 4)); + (adev->doorbell_index.userqueue_end * 2) << 2); } WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c index b2ecb91f8ddc0..5f5b87f995468 100644 --- a/drivers/gpu/drm/nouveau/nouveau_prime.c +++ b/drivers/gpu/drm/nouveau/nouveau_prime.c @@ -111,7 +111,22 @@ int nouveau_gem_prime_pin(struct drm_gem_object *obj) if (ret) return -EINVAL; - return 0; + ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL); + if (ret) + goto error; + + if (nvbo->bo.moving) + ret = dma_fence_wait(nvbo->bo.moving, true); + + ttm_bo_unreserve(&nvbo->bo); + if (ret) + goto error; + + return ret; + +error: + nouveau_bo_unpin(nvbo); + return ret; } void nouveau_gem_prime_unpin(struct drm_gem_object *obj) diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c index b9de0e51c0be9..cbad81578190f 100644 --- a/drivers/gpu/drm/radeon/radeon_prime.c +++ b/drivers/gpu/drm/radeon/radeon_prime.c @@ -94,9 +94,19 @@ int radeon_gem_prime_pin(struct drm_gem_object *obj) /* pin buffer into GTT */ ret = radeon_bo_pin(bo, RADEON_GEM_DOMAIN_GTT, NULL); - if (likely(ret == 0)) - bo->prime_shared_count++; - + if (unlikely(ret)) + goto error; + + if (bo->tbo.moving) { + ret = dma_fence_wait(bo->tbo.moving, false); + if (unlikely(ret)) { + radeon_bo_unpin(bo); + goto error; + } + } + + bo->prime_shared_count++; +error: radeon_bo_unreserve(bo); return ret; } diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index af5f01eff872c..88a8cb840cd54 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -146,6 +146,8 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force) struct vc4_hdmi *vc4_hdmi = connector_to_vc4_hdmi(connector); bool connected = false; + WARN_ON(pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev)); + if (vc4_hdmi->hpd_gpio) { if (gpio_get_value_cansleep(vc4_hdmi->hpd_gpio) ^ vc4_hdmi->hpd_active_low) @@ -167,10 +169,12 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force) } } + pm_runtime_put(&vc4_hdmi->pdev->dev); return connector_status_connected; } cec_phys_addr_invalidate(vc4_hdmi->cec_adap); + pm_runtime_put(&vc4_hdmi->pdev->dev); return connector_status_disconnected; } @@ -415,7 +419,6 @@ static void vc4_hdmi_encoder_post_crtc_powerdown(struct drm_encoder *encoder) HDMI_READ(HDMI_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE); clk_disable_unprepare(vc4_hdmi->pixel_bvb_clock); - clk_disable_unprepare(vc4_hdmi->hsm_clock); clk_disable_unprepare(vc4_hdmi->pixel_clock); ret = pm_runtime_put(&vc4_hdmi->pdev->dev); @@ -666,13 +669,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder) return; } - ret = clk_prepare_enable(vc4_hdmi->hsm_clock); - if (ret) { - DRM_ERROR("Failed to turn on HSM clock: %d\n", ret); - clk_disable_unprepare(vc4_hdmi->pixel_clock); - return; - } - vc4_hdmi_cec_update_clk_div(vc4_hdmi); /* @@ -683,7 +679,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder) (hsm_rate > VC4_HSM_MID_CLOCK ? 150000000 : 75000000)); if (ret) { DRM_ERROR("Failed to set pixel bvb clock rate: %d\n", ret); - clk_disable_unprepare(vc4_hdmi->hsm_clock); clk_disable_unprepare(vc4_hdmi->pixel_clock); return; } @@ -691,7 +686,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder) ret = clk_prepare_enable(vc4_hdmi->pixel_bvb_clock); if (ret) { DRM_ERROR("Failed to turn on pixel bvb clock: %d\n", ret); - clk_disable_unprepare(vc4_hdmi->hsm_clock); clk_disable_unprepare(vc4_hdmi->pixel_clock); return; } @@ -1724,6 +1718,29 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi) return 0; } +#ifdef CONFIG_PM +static int vc4_hdmi_runtime_suspend(struct device *dev) +{ + struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); + + clk_disable_unprepare(vc4_hdmi->hsm_clock); + + return 0; +} + +static int vc4_hdmi_runtime_resume(struct device *dev) +{ + struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); + int ret; + + ret = clk_prepare_enable(vc4_hdmi->hsm_clock); + if (ret) + return ret; + + return 0; +} +#endif + static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) { const struct vc4_hdmi_variant *variant = of_device_get_match_data(dev); @@ -1959,11 +1976,18 @@ static const struct of_device_id vc4_hdmi_dt_match[] = { {} }; +static const struct dev_pm_ops vc4_hdmi_pm_ops = { + SET_RUNTIME_PM_OPS(vc4_hdmi_runtime_suspend, + vc4_hdmi_runtime_resume, + NULL) +}; + struct platform_driver vc4_hdmi_driver = { .probe = vc4_hdmi_dev_probe, .remove = vc4_hdmi_dev_remove, .driver = { .name = "vc4_hdmi", .of_match_table = vc4_hdmi_dt_match, + .pm = &vc4_hdmi_pm_ops, }, }; diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index e42b87e96f747..eab6fd6b890eb 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -974,6 +974,9 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr, } out: + /* Unlock the SMBus device for use by BIOS/ACPI */ + outb_p(SMBHSTSTS_INUSE_STS, SMBHSTSTS(priv)); + pm_runtime_mark_last_busy(&priv->pci_dev->dev); pm_runtime_put_autosuspend(&priv->pci_dev->dev); mutex_unlock(&priv->acpi_lock); diff --git a/drivers/i2c/busses/i2c-robotfuzz-osif.c b/drivers/i2c/busses/i2c-robotfuzz-osif.c index a39f7d0927973..66dfa211e736b 100644 --- a/drivers/i2c/busses/i2c-robotfuzz-osif.c +++ b/drivers/i2c/busses/i2c-robotfuzz-osif.c @@ -83,7 +83,7 @@ static int osif_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, } } - ret = osif_usb_read(adapter, OSIFI2C_STOP, 0, 0, NULL, 0); + ret = osif_usb_write(adapter, OSIFI2C_STOP, 0, 0, NULL, 0); if (ret) { dev_err(&adapter->dev, "failure sending STOP\n"); return -EREMOTEIO; @@ -153,7 +153,7 @@ static int osif_probe(struct usb_interface *interface, * Set bus frequency. The frequency is: * 120,000,000 / ( 16 + 2 * div * 4^prescale). * Using dev = 52, prescale = 0 give 100KHz */ - ret = osif_usb_read(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0, + ret = osif_usb_write(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0, NULL, 0); if (ret) { dev_err(&interface->dev, "failure sending bit rate"); diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index 4ec41579940a3..d3f40c9a8c6c8 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -165,6 +165,7 @@ struct meson_host { unsigned int bounce_buf_size; void *bounce_buf; + void __iomem *bounce_iomem_buf; dma_addr_t bounce_dma_addr; struct sd_emmc_desc *descs; dma_addr_t descs_dma_addr; @@ -734,6 +735,47 @@ static void meson_mmc_desc_chain_transfer(struct mmc_host *mmc, u32 cmd_cfg) writel(start, host->regs + SD_EMMC_START); } +/* local sg copy to buffer version with _to/fromio usage for dram_access_quirk */ +static void meson_mmc_copy_buffer(struct meson_host *host, struct mmc_data *data, + size_t buflen, bool to_buffer) +{ + unsigned int sg_flags = SG_MITER_ATOMIC; + struct scatterlist *sgl = data->sg; + unsigned int nents = data->sg_len; + struct sg_mapping_iter miter; + unsigned int offset = 0; + + if (to_buffer) + sg_flags |= SG_MITER_FROM_SG; + else + sg_flags |= SG_MITER_TO_SG; + + sg_miter_start(&miter, sgl, nents, sg_flags); + + while ((offset < buflen) && sg_miter_next(&miter)) { + unsigned int len; + + len = min(miter.length, buflen - offset); + + /* When dram_access_quirk, the bounce buffer is a iomem mapping */ + if (host->dram_access_quirk) { + if (to_buffer) + memcpy_toio(host->bounce_iomem_buf + offset, miter.addr, len); + else + memcpy_fromio(miter.addr, host->bounce_iomem_buf + offset, len); + } else { + if (to_buffer) + memcpy(host->bounce_buf + offset, miter.addr, len); + else + memcpy(miter.addr, host->bounce_buf + offset, len); + } + + offset += len; + } + + sg_miter_stop(&miter); +} + static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) { struct meson_host *host = mmc_priv(mmc); @@ -777,8 +819,7 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) if (data->flags & MMC_DATA_WRITE) { cmd_cfg |= CMD_CFG_DATA_WR; WARN_ON(xfer_bytes > host->bounce_buf_size); - sg_copy_to_buffer(data->sg, data->sg_len, - host->bounce_buf, xfer_bytes); + meson_mmc_copy_buffer(host, data, xfer_bytes, true); dma_wmb(); } @@ -947,8 +988,7 @@ static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id) if (meson_mmc_bounce_buf_read(data)) { xfer_bytes = data->blksz * data->blocks; WARN_ON(xfer_bytes > host->bounce_buf_size); - sg_copy_from_buffer(data->sg, data->sg_len, - host->bounce_buf, xfer_bytes); + meson_mmc_copy_buffer(host, data, xfer_bytes, false); } next_cmd = meson_mmc_get_next_command(cmd); @@ -1168,7 +1208,7 @@ static int meson_mmc_probe(struct platform_device *pdev) * instead of the DDR memory */ host->bounce_buf_size = SD_EMMC_SRAM_DATA_BUF_LEN; - host->bounce_buf = host->regs + SD_EMMC_SRAM_DATA_BUF_OFF; + host->bounce_iomem_buf = host->regs + SD_EMMC_SRAM_DATA_BUF_OFF; host->bounce_dma_addr = res->start + SD_EMMC_SRAM_DATA_BUF_OFF; } else { /* data bounce buffer */ diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index d025ea4349339..39fbd0be179c2 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -351,6 +351,7 @@ static int ldisc_open(struct tty_struct *tty) rtnl_lock(); result = register_netdevice(dev); if (result) { + tty_kref_put(tty); rtnl_unlock(); free_netdev(dev); return -ENODEV; diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index 17d5b649eb36b..e81dd34a3cac2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -1266,9 +1266,11 @@ int qed_dcbx_get_config_params(struct qed_hwfn *p_hwfn, p_hwfn->p_dcbx_info->set.ver_num |= DCBX_CONFIG_VERSION_STATIC; p_hwfn->p_dcbx_info->set.enabled = dcbx_info->operational.enabled; + BUILD_BUG_ON(sizeof(dcbx_info->operational.params) != + sizeof(p_hwfn->p_dcbx_info->set.config.params)); memcpy(&p_hwfn->p_dcbx_info->set.config.params, &dcbx_info->operational.params, - sizeof(struct qed_dcbx_admin_params)); + sizeof(p_hwfn->p_dcbx_info->set.config.params)); p_hwfn->p_dcbx_info->set.config.valid = true; memcpy(params, &p_hwfn->p_dcbx_info->set, sizeof(struct qed_dcbx_set)); diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 3bb36f4a984e8..a6bf80b529679 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1673,7 +1673,7 @@ static void rtl8169_get_strings(struct net_device *dev, u32 stringset, u8 *data) { switch(stringset) { case ETH_SS_STATS: - memcpy(data, *rtl8169_gstrings, sizeof(rtl8169_gstrings)); + memcpy(data, rtl8169_gstrings, sizeof(rtl8169_gstrings)); break; } } diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 6d84266c03caf..5cab2d3c00236 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2287,7 +2287,7 @@ static void sh_eth_get_strings(struct net_device *ndev, u32 stringset, u8 *data) { switch (stringset) { case ETH_SS_STATS: - memcpy(data, *sh_eth_gstrings_stats, + memcpy(data, sh_eth_gstrings_stats, sizeof(sh_eth_gstrings_stats)); break; } diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 01bb36e7cff0a..6bd3a389d389c 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -774,12 +774,15 @@ static void temac_start_xmit_done(struct net_device *ndev) stat = be32_to_cpu(cur_p->app0); while (stat & STS_CTRL_APP0_CMPLT) { + /* Make sure that the other fields are read after bd is + * released by dma + */ + rmb(); dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys), be32_to_cpu(cur_p->len), DMA_TO_DEVICE); skb = (struct sk_buff *)ptr_from_txbd(cur_p); if (skb) dev_consume_skb_irq(skb); - cur_p->app0 = 0; cur_p->app1 = 0; cur_p->app2 = 0; cur_p->app3 = 0; @@ -788,6 +791,12 @@ static void temac_start_xmit_done(struct net_device *ndev) ndev->stats.tx_packets++; ndev->stats.tx_bytes += be32_to_cpu(cur_p->len); + /* app0 must be visible last, as it is used to flag + * availability of the bd + */ + smp_mb(); + cur_p->app0 = 0; + lp->tx_bd_ci++; if (lp->tx_bd_ci >= lp->tx_bd_num) lp->tx_bd_ci = 0; @@ -814,6 +823,9 @@ static inline int temac_check_tx_bd_space(struct temac_local *lp, int num_frag) if (cur_p->app0) return NETDEV_TX_BUSY; + /* Make sure to read next bd app0 after this one */ + rmb(); + tail++; if (tail >= lp->tx_bd_num) tail = 0; @@ -930,6 +942,11 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) wmb(); lp->dma_out(lp, TX_TAILDESC_PTR, tail_p); /* DMA start */ + if (temac_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) { + netdev_info(ndev, "%s -> netif_stop_queue\n", __func__); + netif_stop_queue(ndev); + } + return NETDEV_TX_OK; } diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 69d3eacc2b96c..c716074fdef0b 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -792,16 +792,12 @@ static int dp83867_phy_reset(struct phy_device *phydev) { int err; - err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESET); + err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESTART); if (err < 0) return err; usleep_range(10, 20); - /* After reset FORCE_LINK_GOOD bit is set. Although the - * default value should be unset. Disable FORCE_LINK_GOOD - * for the phy to work properly. - */ return phy_modify(phydev, MII_DP83867_PHYCTRL, DP83867_PHYCR_FORCE_LINK_GOOD, 0); } diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index f5010f8ac1ec7..95e27fb7d2c10 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -6054,7 +6054,7 @@ static void rtl8152_get_strings(struct net_device *dev, u32 stringset, u8 *data) { switch (stringset) { case ETH_SS_STATS: - memcpy(data, *rtl8152_gstrings, sizeof(rtl8152_gstrings)); + memcpy(data, rtl8152_gstrings, sizeof(rtl8152_gstrings)); break; } } diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 3b3fc7c9c91dc..f147d4feedb91 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1623,8 +1623,13 @@ static int mac80211_hwsim_start(struct ieee80211_hw *hw) static void mac80211_hwsim_stop(struct ieee80211_hw *hw) { struct mac80211_hwsim_data *data = hw->priv; + data->started = false; hrtimer_cancel(&data->beacon_timer); + + while (!skb_queue_empty(&data->pending)) + ieee80211_free_txskb(hw, skb_dequeue(&data->pending)); + wiphy_dbg(hw->wiphy, "%s\n", __func__); } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d5d9ea864fe66..9e971fffeb6a3 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1874,11 +1874,21 @@ static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags) int err; int i, bars = 0; - if (atomic_inc_return(&dev->enable_cnt) > 1) { - pci_update_current_state(dev, dev->current_state); - return 0; /* already enabled */ + /* + * Power state could be unknown at this point, either due to a fresh + * boot or a device removal call. So get the current power state + * so that things like MSI message writing will behave as expected + * (e.g. if the device really is in D0 at enable time). + */ + if (dev->pm_cap) { + u16 pmcsr; + pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); + dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK); } + if (atomic_inc_return(&dev->enable_cnt) > 1) + return 0; /* already enabled */ + bridge = pci_upstream_bridge(dev); if (bridge) pci_enable_bridge(bridge); diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index 7d9bdedcd71bb..3af4430543dca 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -1229,7 +1229,7 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, struct device *dev = pctl->dev; struct resource res; int npins = STM32_GPIO_PINS_PER_BANK; - int bank_nr, err; + int bank_nr, err, i = 0; if (!IS_ERR(bank->rstc)) reset_control_deassert(bank->rstc); @@ -1251,9 +1251,14 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, of_property_read_string(np, "st,bank-name", &bank->gpio_chip.label); - if (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0, &args)) { + if (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, i, &args)) { bank_nr = args.args[1] / STM32_GPIO_PINS_PER_BANK; bank->gpio_chip.base = args.args[1]; + + npins = args.args[2]; + while (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, + ++i, &args)) + npins += args.args[2]; } else { bank_nr = pctl->nbanks; bank->gpio_chip.base = bank_nr * STM32_GPIO_PINS_PER_BANK; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 20a6564f87d9f..01f87bcab3dd1 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1389,6 +1389,22 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt) } } +static bool sd_need_revalidate(struct block_device *bdev, + struct scsi_disk *sdkp) +{ + if (sdkp->device->removable || sdkp->write_prot) { + if (bdev_check_media_change(bdev)) + return true; + } + + /* + * Force a full rescan after ioctl(BLKRRPART). While the disk state has + * nothing to do with partitions, BLKRRPART is used to force a full + * revalidate after things like a format for historical reasons. + */ + return test_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); +} + /** * sd_open - open a scsi disk device * @bdev: Block device of the scsi disk to open @@ -1425,10 +1441,8 @@ static int sd_open(struct block_device *bdev, fmode_t mode) if (!scsi_block_when_processing_errors(sdev)) goto error_out; - if (sdev->removable || sdkp->write_prot) { - if (bdev_check_media_change(bdev)) - sd_revalidate_disk(bdev->bd_disk); - } + if (sd_need_revalidate(bdev, sdkp)) + sd_revalidate_disk(bdev->bd_disk); /* * If the drive is empty, just let the open fail. diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c index ab9035662717a..bcc0b5a3a459c 100644 --- a/drivers/spi/spi-nxp-fspi.c +++ b/drivers/spi/spi-nxp-fspi.c @@ -1033,12 +1033,6 @@ static int nxp_fspi_probe(struct platform_device *pdev) goto err_put_ctrl; } - /* Clear potential interrupts */ - reg = fspi_readl(f, f->iobase + FSPI_INTR); - if (reg) - fspi_writel(f, reg, f->iobase + FSPI_INTR); - - /* find the resources - controller memory mapped space */ if (is_acpi_node(f->dev->fwnode)) res = platform_get_resource(pdev, IORESOURCE_MEM, 1); @@ -1076,6 +1070,11 @@ static int nxp_fspi_probe(struct platform_device *pdev) } } + /* Clear potential interrupts */ + reg = fspi_readl(f, f->iobase + FSPI_INTR); + if (reg) + fspi_writel(f, reg, f->iobase + FSPI_INTR); + /* find the irq */ ret = platform_get_irq(pdev, 0); if (ret < 0) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 35c83f65475b2..8b0507f69c156 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1302,6 +1302,45 @@ ceph_find_incompatible(struct page *page) return NULL; } +/** + * prep_noread_page - prep a page for writing without reading first + * @page: page being prepared + * @pos: starting position for the write + * @len: length of write + * + * In some cases, write_begin doesn't need to read at all: + * - full page write + * - file is currently zero-length + * - write that lies in a page that is completely beyond EOF + * - write that covers the the page from start to EOF or beyond it + * + * If any of these criteria are met, then zero out the unwritten parts + * of the page and return true. Otherwise, return false. + */ +static bool skip_page_read(struct page *page, loff_t pos, size_t len) +{ + struct inode *inode = page->mapping->host; + loff_t i_size = i_size_read(inode); + size_t offset = offset_in_page(pos); + + /* Full page write */ + if (offset == 0 && len >= PAGE_SIZE) + return true; + + /* pos beyond last page in the file */ + if (pos - offset >= i_size) + goto zero_out; + + /* write that covers the whole page from start to EOF or beyond it */ + if (offset == 0 && (pos + len) >= i_size) + goto zero_out; + + return false; +zero_out: + zero_user_segments(page, 0, offset, offset + len, PAGE_SIZE); + return true; +} + /* * We are only allowed to write into/dirty the page if the page is * clean, or already dirty within the same snap context. @@ -1315,7 +1354,6 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, struct ceph_snap_context *snapc; struct page *page = NULL; pgoff_t index = pos >> PAGE_SHIFT; - int pos_in_page = pos & ~PAGE_MASK; int r = 0; dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len); @@ -1350,19 +1388,9 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, break; } - /* - * In some cases we don't need to read at all: - * - full page write - * - write that lies completely beyond EOF - * - write that covers the the page from start to EOF or beyond it - */ - if ((pos_in_page == 0 && len == PAGE_SIZE) || - (pos >= i_size_read(inode)) || - (pos_in_page == 0 && (pos + len) >= i_size_read(inode))) { - zero_user_segments(page, 0, pos_in_page, - pos_in_page + len, PAGE_SIZE); + /* No need to read in some cases */ + if (skip_page_read(page, pos, len)) break; - } /* * We need to read it. If we get back -EINPROGRESS, then the page was diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 209535d5b8d38..3d2e3dd4ee01d 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -578,6 +578,7 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, struct ceph_inode_info *ci = ceph_inode(dir); struct inode *inode; struct timespec64 now; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_vino vino = { .ino = req->r_deleg_ino, .snap = CEPH_NOSNAP }; @@ -615,8 +616,10 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, ceph_file_layout_to_legacy(lo, &in.layout); + down_read(&mdsc->snap_rwsem); ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session, req->r_fmode, NULL); + up_read(&mdsc->snap_rwsem); if (ret) { dout("%s failed to fill inode: %d\n", __func__, ret); ceph_dir_clear_complete(dir); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 346fcdfcd3e91..57cd78e942c08 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -762,6 +762,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, bool new_version = false; bool fill_inline = false; + lockdep_assert_held(&mdsc->snap_rwsem); + dout("%s %p ino %llx.%llx v %llu had %llu\n", __func__, inode, ceph_vinop(inode), le64_to_cpu(info->version), ci->i_version); diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 303d71430bdd1..9c6c0e2e5880a 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -1053,6 +1053,7 @@ void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs) nilfs_sysfs_delete_superblock_group(nilfs); nilfs_sysfs_delete_segctor_group(nilfs); kobject_del(&nilfs->ns_dev_kobj); + kobject_put(&nilfs->ns_dev_kobj); kfree(nilfs->ns_dev_subgroups); } diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h index fb8b07daa9d15..875e002a41804 100644 --- a/include/keys/system_keyring.h +++ b/include/keys/system_keyring.h @@ -31,6 +31,7 @@ extern int restrict_link_by_builtin_and_secondary_trusted( #define restrict_link_by_builtin_and_secondary_trusted restrict_link_by_builtin_trusted #endif +extern struct pkcs7_message *pkcs7; #ifdef CONFIG_SYSTEM_BLACKLIST_KEYRING extern int mark_hash_blacklisted(const char *hash); extern int is_hash_blacklisted(const u8 *hash, size_t hash_len, @@ -49,6 +50,20 @@ static inline int is_binary_blacklisted(const u8 *hash, size_t hash_len) } #endif +#ifdef CONFIG_SYSTEM_REVOCATION_LIST +extern int add_key_to_revocation_list(const char *data, size_t size); +extern int is_key_on_revocation_list(struct pkcs7_message *pkcs7); +#else +static inline int add_key_to_revocation_list(const char *data, size_t size) +{ + return 0; +} +static inline int is_key_on_revocation_list(struct pkcs7_message *pkcs7) +{ + return -ENOKEY; +} +#endif + #ifdef CONFIG_IMA_BLACKLIST_KEYRING extern struct key *ima_blacklist_keyring; diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 2915f56ad4214..edb5c186b0b7a 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -27,8 +27,10 @@ extern int debug_locks_off(void); int __ret = 0; \ \ if (!oops_in_progress && unlikely(c)) { \ + instrumentation_begin(); \ if (debug_locks_off() && !debug_locks_silent) \ WARN(1, "DEBUG_LOCKS_WARN_ON(%s)", #c); \ + instrumentation_end(); \ __ret = 1; \ } \ __ret; \ diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 0365aa97f8e73..ff55be0117397 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -297,6 +297,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd); extern struct page *huge_zero_page; +extern unsigned long huge_zero_pfn; static inline bool is_huge_zero_page(struct page *page) { @@ -305,7 +306,7 @@ static inline bool is_huge_zero_page(struct page *page) static inline bool is_huge_zero_pmd(pmd_t pmd) { - return is_huge_zero_page(pmd_page(pmd)); + return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd); } static inline bool is_huge_zero_pud(pud_t pud) @@ -451,6 +452,11 @@ static inline bool is_huge_zero_page(struct page *page) return false; } +static inline bool is_huge_zero_pmd(pmd_t pmd) +{ + return false; +} + static inline bool is_huge_zero_pud(pud_t pud) { return false; diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index b5807f23caf80..5b68c9787f7c2 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -628,17 +628,6 @@ static inline int hstate_index(struct hstate *h) return h - hstates; } -pgoff_t __basepage_index(struct page *page); - -/* Return page->index in PAGE_SIZE units */ -static inline pgoff_t basepage_index(struct page *page) -{ - if (!PageCompound(page)) - return page->index; - - return __basepage_index(page); -} - extern int dissolve_free_huge_page(struct page *page); extern int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn); @@ -871,11 +860,6 @@ static inline int hstate_index(struct hstate *h) return 0; } -static inline pgoff_t basepage_index(struct page *page) -{ - return page->index; -} - static inline int dissolve_free_huge_page(struct page *page) { return 0; diff --git a/include/linux/mm.h b/include/linux/mm.h index 5106db3ad1ce3..289c26f055cdd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1648,6 +1648,7 @@ struct zap_details { struct address_space *check_mapping; /* Check page->mapping if set */ pgoff_t first_index; /* Lowest page->index to unmap */ pgoff_t last_index; /* Highest page->index to unmap */ + struct page *single_page; /* Locked page to be unmapped */ }; struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, @@ -1695,6 +1696,7 @@ extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma, extern int fixup_user_fault(struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked); +void unmap_mapping_page(struct page *page); void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t nr, bool even_cows); void unmap_mapping_range(struct address_space *mapping, @@ -1715,6 +1717,7 @@ static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address, BUG(); return -EFAULT; } +static inline void unmap_mapping_page(struct page *page) { } static inline void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t nr, bool even_cows) { } static inline void unmap_mapping_range(struct address_space *mapping, diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 2ad72d2c8cc52..5d0767cb424aa 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -37,6 +37,18 @@ void dump_mm(const struct mm_struct *mm); BUG(); \ } \ } while (0) +#define VM_WARN_ON_ONCE_PAGE(cond, page) ({ \ + static bool __section(".data.once") __warned; \ + int __ret_warn_once = !!(cond); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ + dump_page(page, "VM_WARN_ON_ONCE_PAGE(" __stringify(cond)")");\ + __warned = true; \ + WARN_ON(1); \ + } \ + unlikely(__ret_warn_once); \ +}) + #define VM_WARN_ON(cond) (void)WARN_ON(cond) #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) #define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format) @@ -48,6 +60,7 @@ void dump_mm(const struct mm_struct *mm); #define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond) #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) +#define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) #endif diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index b032f094a7827..fcb3f040102af 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -501,7 +501,7 @@ static inline struct page *read_mapping_page(struct address_space *mapping, } /* - * Get index of the page with in radix-tree + * Get index of the page within radix-tree (but not for hugetlb pages). * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE) */ static inline pgoff_t page_to_index(struct page *page) @@ -520,15 +520,16 @@ static inline pgoff_t page_to_index(struct page *page) return pgoff; } +extern pgoff_t hugetlb_basepage_index(struct page *page); + /* - * Get the offset in PAGE_SIZE. - * (TODO: hugepage should have ->index in PAGE_SIZE) + * Get the offset in PAGE_SIZE (even for hugetlb pages). + * (TODO: hugetlb pages should have ->index in PAGE_SIZE) */ static inline pgoff_t page_to_pgoff(struct page *page) { - if (unlikely(PageHeadHuge(page))) - return page->index << compound_order(page); - + if (unlikely(PageHuge(page))) + return hugetlb_basepage_index(page); return page_to_index(page); } diff --git a/include/linux/rmap.h b/include/linux/rmap.h index def5c62c93b3b..8d04e7deedc66 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -91,6 +91,7 @@ enum ttu_flags { TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */ TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */ + TTU_SYNC = 0x10, /* avoid racy checks with PVMW_SYNC */ TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */ TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible * and caller guarantees they will diff --git a/include/net/sock.h b/include/net/sock.h index f68184b8c0aa5..3c7addf951509 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1900,7 +1900,8 @@ static inline u32 net_tx_rndhash(void) static inline void sk_set_txhash(struct sock *sk) { - sk->sk_txhash = net_tx_rndhash(); + /* This pairs with READ_ONCE() in skb_set_hash_from_sk() */ + WRITE_ONCE(sk->sk_txhash, net_tx_rndhash()); } static inline bool sk_rethink_txhash(struct sock *sk) @@ -2172,9 +2173,12 @@ static inline void sock_poll_wait(struct file *filp, struct socket *sock, static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk) { - if (sk->sk_txhash) { + /* This pairs with WRITE_ONCE() in sk_set_txhash() */ + u32 txhash = READ_ONCE(sk->sk_txhash); + + if (txhash) { skb->l4_hash = 1; - skb->hash = sk->sk_txhash; + skb->hash = txhash; } } @@ -2232,8 +2236,13 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk); static inline int sock_error(struct sock *sk) { int err; - if (likely(!sk->sk_err)) + + /* Avoid an atomic operation for the common case. + * This is racy since another cpu/thread can change sk_err under us. + */ + if (likely(data_race(!sk->sk_err))) return 0; + err = xchg(&sk->sk_err, 0); return -err; } diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 0f61b14b00995..0ed0e1f215c75 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -667,6 +667,9 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, if (orig_addr == INVALID_PHYS_ADDR) return; + orig_addr += (tlb_addr & (IO_TLB_SIZE - 1)) - + swiotlb_align_offset(hwdev, orig_addr); + switch (target) { case SYNC_FOR_CPU: if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) diff --git a/kernel/futex.c b/kernel/futex.c index 3136aba177720..98a6e1b80bfe4 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -652,7 +651,7 @@ again: key->both.offset |= FUT_OFF_INODE; /* inode-based key */ key->shared.i_seq = get_inode_sequence_number(inode); - key->shared.pgoff = basepage_index(tail); + key->shared.pgoff = page_to_pgoff(tail); rcu_read_unlock(); } diff --git a/kernel/kthread.c b/kernel/kthread.c index 5edf7e19ab262..36be4364b313a 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1044,8 +1044,38 @@ void kthread_flush_work(struct kthread_work *work) EXPORT_SYMBOL_GPL(kthread_flush_work); /* - * This function removes the work from the worker queue. Also it makes sure - * that it won't get queued later via the delayed work's timer. + * Make sure that the timer is neither set nor running and could + * not manipulate the work list_head any longer. + * + * The function is called under worker->lock. The lock is temporary + * released but the timer can't be set again in the meantime. + */ +static void kthread_cancel_delayed_work_timer(struct kthread_work *work, + unsigned long *flags) +{ + struct kthread_delayed_work *dwork = + container_of(work, struct kthread_delayed_work, work); + struct kthread_worker *worker = work->worker; + + /* + * del_timer_sync() must be called to make sure that the timer + * callback is not running. The lock must be temporary released + * to avoid a deadlock with the callback. In the meantime, + * any queuing is blocked by setting the canceling counter. + */ + work->canceling++; + raw_spin_unlock_irqrestore(&worker->lock, *flags); + del_timer_sync(&dwork->timer); + raw_spin_lock_irqsave(&worker->lock, *flags); + work->canceling--; +} + +/* + * This function removes the work from the worker queue. + * + * It is called under worker->lock. The caller must make sure that + * the timer used by delayed work is not running, e.g. by calling + * kthread_cancel_delayed_work_timer(). * * The work might still be in use when this function finishes. See the * current_work proceed by the worker. @@ -1053,28 +1083,8 @@ EXPORT_SYMBOL_GPL(kthread_flush_work); * Return: %true if @work was pending and successfully canceled, * %false if @work was not pending */ -static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork, - unsigned long *flags) +static bool __kthread_cancel_work(struct kthread_work *work) { - /* Try to cancel the timer if exists. */ - if (is_dwork) { - struct kthread_delayed_work *dwork = - container_of(work, struct kthread_delayed_work, work); - struct kthread_worker *worker = work->worker; - - /* - * del_timer_sync() must be called to make sure that the timer - * callback is not running. The lock must be temporary released - * to avoid a deadlock with the callback. In the meantime, - * any queuing is blocked by setting the canceling counter. - */ - work->canceling++; - raw_spin_unlock_irqrestore(&worker->lock, *flags); - del_timer_sync(&dwork->timer); - raw_spin_lock_irqsave(&worker->lock, *flags); - work->canceling--; - } - /* * Try to remove the work from a worker list. It might either * be from worker->work_list or from worker->delayed_work_list. @@ -1127,11 +1137,23 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker, /* Work must not be used with >1 worker, see kthread_queue_work() */ WARN_ON_ONCE(work->worker != worker); - /* Do not fight with another command that is canceling this work. */ + /* + * Temporary cancel the work but do not fight with another command + * that is canceling the work as well. + * + * It is a bit tricky because of possible races with another + * mod_delayed_work() and cancel_delayed_work() callers. + * + * The timer must be canceled first because worker->lock is released + * when doing so. But the work can be removed from the queue (list) + * only when it can be queued again so that the return value can + * be used for reference counting. + */ + kthread_cancel_delayed_work_timer(work, &flags); if (work->canceling) goto out; + ret = __kthread_cancel_work(work); - ret = __kthread_cancel_work(work, true, &flags); fast_queue: __kthread_queue_delayed_work(worker, dwork, delay); out: @@ -1153,7 +1175,10 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork) /* Work must not be used with >1 worker, see kthread_queue_work(). */ WARN_ON_ONCE(work->worker != worker); - ret = __kthread_cancel_work(work, is_dwork, &flags); + if (is_dwork) + kthread_cancel_delayed_work_timer(work, &flags); + + ret = __kthread_cancel_work(work); if (worker->current_work != work) goto out_fast; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 858b96b438cee..cdca007551e71 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -842,7 +842,7 @@ static int count_matching_names(struct lock_class *new_class) } /* used from NMI context -- must be lockless */ -static __always_inline struct lock_class * +static noinstr struct lock_class * look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass) { struct lockdep_subclass_key *key; @@ -850,12 +850,14 @@ look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass) struct lock_class *class; if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { + instrumentation_begin(); debug_locks_off(); printk(KERN_ERR "BUG: looking up invalid subclass: %u\n", subclass); printk(KERN_ERR "turning off the locking correctness validator.\n"); dump_stack(); + instrumentation_end(); return NULL; } diff --git a/kernel/module.c b/kernel/module.c index 908d46abe1656..185b2655bc206 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -272,9 +272,18 @@ static void module_assert_mutex_or_preempt(void) #endif } +#ifdef CONFIG_MODULE_SIG static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE); module_param(sig_enforce, bool_enable_only, 0644); +void set_module_sig_enforced(void) +{ + sig_enforce = true; +} +#else +#define sig_enforce false +#endif + /* * Export sig_enforce kernel cmdline parameter to allow other subsystems rely * on that instead of directly to CONFIG_MODULE_SIG_FORCE config. @@ -285,11 +294,6 @@ bool is_module_sig_enforced(void) } EXPORT_SYMBOL(is_module_sig_enforced); -void set_module_sig_enforced(void) -{ - sig_enforce = true; -} - /* Block module loading/unloading? */ int modules_disabled = 0; core_param(nomodule, modules_disabled, bint, 0); diff --git a/lib/debug_locks.c b/lib/debug_locks.c index 06d3135bd184c..a75ee30b77cb8 100644 --- a/lib/debug_locks.c +++ b/lib/debug_locks.c @@ -36,7 +36,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent); /* * Generic 'turn off all lock debugging' function: */ -noinstr int debug_locks_off(void) +int debug_locks_off(void) { if (debug_locks && __debug_locks_off()) { if (!debug_locks_silent) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d9ade23ac2b22..6301ecc1f679a 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -61,6 +61,7 @@ static struct shrinker deferred_split_shrinker; static atomic_t huge_zero_refcount; struct page *huge_zero_page __read_mostly; +unsigned long huge_zero_pfn __read_mostly = ~0UL; bool transparent_hugepage_enabled(struct vm_area_struct *vma) { @@ -97,6 +98,7 @@ retry: __free_pages(zero_page, compound_order(zero_page)); goto retry; } + WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page)); /* We take additional reference here. It will be put back by shrinker */ atomic_set(&huge_zero_refcount, 2); @@ -146,6 +148,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink, if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { struct page *zero_page = xchg(&huge_zero_page, NULL); BUG_ON(zero_page == NULL); + WRITE_ONCE(huge_zero_pfn, ~0UL); __free_pages(zero_page, compound_order(zero_page)); return HPAGE_PMD_NR; } @@ -2031,7 +2034,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, count_vm_event(THP_SPLIT_PMD); if (!vma_is_anonymous(vma)) { - _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); + old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); /* * We are going to unmap this huge page. So * just go ahead and zap it @@ -2040,16 +2043,25 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, zap_deposited_table(mm, pmd); if (vma_is_special_huge(vma)) return; - page = pmd_page(_pmd); - if (!PageDirty(page) && pmd_dirty(_pmd)) - set_page_dirty(page); - if (!PageReferenced(page) && pmd_young(_pmd)) - SetPageReferenced(page); - page_remove_rmap(page, true); - put_page(page); + if (unlikely(is_pmd_migration_entry(old_pmd))) { + swp_entry_t entry; + + entry = pmd_to_swp_entry(old_pmd); + page = migration_entry_to_page(entry); + } else { + page = pmd_page(old_pmd); + if (!PageDirty(page) && pmd_dirty(old_pmd)) + set_page_dirty(page); + if (!PageReferenced(page) && pmd_young(old_pmd)) + SetPageReferenced(page); + page_remove_rmap(page, true); + put_page(page); + } add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR); return; - } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) { + } + + if (is_huge_zero_pmd(*pmd)) { /* * FIXME: Do we want to invalidate secondary mmu by calling * mmu_notifier_invalidate_range() see comments below inside @@ -2330,17 +2342,17 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, static void unmap_page(struct page *page) { - enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | + enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD; - bool unmap_success; VM_BUG_ON_PAGE(!PageHead(page), page); if (PageAnon(page)) ttu_flags |= TTU_SPLIT_FREEZE; - unmap_success = try_to_unmap(page, ttu_flags); - VM_BUG_ON_PAGE(!unmap_success, page); + try_to_unmap(page, ttu_flags); + + VM_WARN_ON_ONCE_PAGE(page_mapped(page), page); } static void remap_page(struct page *page, unsigned int nr) @@ -2630,7 +2642,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) struct deferred_split *ds_queue = get_deferred_split_queue(head); struct anon_vma *anon_vma = NULL; struct address_space *mapping = NULL; - int count, mapcount, extra_pins, ret; + int extra_pins, ret; unsigned long flags; pgoff_t end; @@ -2690,7 +2702,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) } unmap_page(head); - VM_BUG_ON_PAGE(compound_mapcount(head), head); /* prevent PageLRU to go away from under us, and freeze lru stats */ spin_lock_irqsave(&pgdata->lru_lock, flags); @@ -2709,9 +2720,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) /* Prevent deferred_split_scan() touching ->_refcount */ spin_lock(&ds_queue->split_queue_lock); - count = page_count(head); - mapcount = total_mapcount(head); - if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) { + if (page_ref_freeze(head, 1 + extra_pins)) { if (!list_empty(page_deferred_list(head))) { ds_queue->split_queue_len--; list_del(page_deferred_list(head)); @@ -2727,16 +2736,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) __split_huge_page(page, list, end, flags); ret = 0; } else { - if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { - pr_alert("total_mapcount: %u, page_count(): %u\n", - mapcount, count); - if (PageTail(page)) - dump_page(head, NULL); - dump_page(page, "total_mapcount(head) > 0"); - BUG(); - } spin_unlock(&ds_queue->split_queue_lock); -fail: if (mapping) +fail: + if (mapping) xa_unlock(&mapping->i_pages); spin_unlock_irqrestore(&pgdata->lru_lock, flags); remap_page(head, thp_nr_pages(head)); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index bc1006a327338..d4f89c2f95446 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1635,15 +1635,12 @@ struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage) return NULL; } -pgoff_t __basepage_index(struct page *page) +pgoff_t hugetlb_basepage_index(struct page *page) { struct page *page_head = compound_head(page); pgoff_t index = page_index(page_head); unsigned long compound_idx; - if (!PageHuge(page_head)) - return page_index(page); - if (compound_order(page_head) >= MAX_ORDER) compound_idx = page_to_pfn(page) - page_to_pfn(page_head); else diff --git a/mm/internal.h b/mm/internal.h index c43ccdddb0f6e..840b8a330b9ac 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -379,27 +379,52 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page) extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); /* - * At what user virtual address is page expected in @vma? + * At what user virtual address is page expected in vma? + * Returns -EFAULT if all of the page is outside the range of vma. + * If page is a compound head, the entire compound page is considered. */ static inline unsigned long -__vma_address(struct page *page, struct vm_area_struct *vma) +vma_address(struct page *page, struct vm_area_struct *vma) { - pgoff_t pgoff = page_to_pgoff(page); - return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); + pgoff_t pgoff; + unsigned long address; + + VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */ + pgoff = page_to_pgoff(page); + if (pgoff >= vma->vm_pgoff) { + address = vma->vm_start + + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); + /* Check for address beyond vma (or wrapped through 0?) */ + if (address < vma->vm_start || address >= vma->vm_end) + address = -EFAULT; + } else if (PageHead(page) && + pgoff + compound_nr(page) - 1 >= vma->vm_pgoff) { + /* Test above avoids possibility of wrap to 0 on 32-bit */ + address = vma->vm_start; + } else { + address = -EFAULT; + } + return address; } +/* + * Then at what user virtual address will none of the page be found in vma? + * Assumes that vma_address() already returned a good starting address. + * If page is a compound head, the entire compound page is considered. + */ static inline unsigned long -vma_address(struct page *page, struct vm_area_struct *vma) +vma_address_end(struct page *page, struct vm_area_struct *vma) { - unsigned long start, end; - - start = __vma_address(page, vma); - end = start + thp_size(page) - PAGE_SIZE; - - /* page should be within @vma mapping range */ - VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma); - - return max(start, vma->vm_start); + pgoff_t pgoff; + unsigned long address; + + VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */ + pgoff = page_to_pgoff(page) + compound_nr(page); + address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); + /* Check for address beyond vma (or wrapped through 0?) */ + if (address < vma->vm_start || address > vma->vm_end) + address = vma->vm_end; + return address; } static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, diff --git a/mm/memory.c b/mm/memory.c index b70bd3ba33888..eb31b3e4ef93b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1355,7 +1355,18 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, else if (zap_huge_pmd(tlb, vma, pmd, addr)) goto next; /* fall through */ + } else if (details && details->single_page && + PageTransCompound(details->single_page) && + next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { + spinlock_t *ptl = pmd_lock(tlb->mm, pmd); + /* + * Take and drop THP pmd lock so that we cannot return + * prematurely, while zap_huge_pmd() has cleared *pmd, + * but not yet decremented compound_mapcount(). + */ + spin_unlock(ptl); } + /* * Here there can be other concurrent MADV_DONTNEED or * trans huge page faults running, and if the pmd is @@ -3185,6 +3196,36 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root, } } +/** + * unmap_mapping_page() - Unmap single page from processes. + * @page: The locked page to be unmapped. + * + * Unmap this page from any userspace process which still has it mmaped. + * Typically, for efficiency, the range of nearby pages has already been + * unmapped by unmap_mapping_pages() or unmap_mapping_range(). But once + * truncation or invalidation holds the lock on a page, it may find that + * the page has been remapped again: and then uses unmap_mapping_page() + * to unmap it finally. + */ +void unmap_mapping_page(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct zap_details details = { }; + + VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON(PageTail(page)); + + details.check_mapping = mapping; + details.first_index = page->index; + details.last_index = page->index + thp_nr_pages(page) - 1; + details.single_page = page; + + i_mmap_lock_write(mapping); + if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) + unmap_mapping_range_tree(&mapping->i_mmap, &details); + i_mmap_unlock_write(mapping); +} + /** * unmap_mapping_pages() - Unmap pages from processes. * @mapping: The address space containing pages to be unmapped. diff --git a/mm/migrate.c b/mm/migrate.c index 7982256a51250..278e6f3fa62ce 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -326,6 +326,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, goto out; page = migration_entry_to_page(entry); + page = compound_head(page); /* * Once page cache replacement of page migration started, page_count diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 5e77b269c330a..610ebbee787cc 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -115,6 +115,13 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw) return pfn_is_match(pvmw->page, pfn); } +static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size) +{ + pvmw->address = (pvmw->address + size) & ~(size - 1); + if (!pvmw->address) + pvmw->address = ULONG_MAX; +} + /** * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at * @pvmw->address @@ -143,6 +150,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) { struct mm_struct *mm = pvmw->vma->vm_mm; struct page *page = pvmw->page; + unsigned long end; pgd_t *pgd; p4d_t *p4d; pud_t *pud; @@ -152,10 +160,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) if (pvmw->pmd && !pvmw->pte) return not_found(pvmw); - if (pvmw->pte) - goto next_pte; + if (unlikely(PageHuge(page))) { + /* The only possible mapping was handled on last iteration */ + if (pvmw->pte) + return not_found(pvmw); - if (unlikely(PageHuge(pvmw->page))) { /* when pud is not present, pte will be NULL */ pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page)); if (!pvmw->pte) @@ -167,78 +176,108 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) return not_found(pvmw); return true; } -restart: - pgd = pgd_offset(mm, pvmw->address); - if (!pgd_present(*pgd)) - return false; - p4d = p4d_offset(pgd, pvmw->address); - if (!p4d_present(*p4d)) - return false; - pud = pud_offset(p4d, pvmw->address); - if (!pud_present(*pud)) - return false; - pvmw->pmd = pmd_offset(pud, pvmw->address); + /* - * Make sure the pmd value isn't cached in a register by the - * compiler and used as a stale value after we've observed a - * subsequent update. + * Seek to next pte only makes sense for THP. + * But more important than that optimization, is to filter out + * any PageKsm page: whose page->index misleads vma_address() + * and vma_address_end() to disaster. */ - pmde = READ_ONCE(*pvmw->pmd); - if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { - pvmw->ptl = pmd_lock(mm, pvmw->pmd); - if (likely(pmd_trans_huge(*pvmw->pmd))) { - if (pvmw->flags & PVMW_MIGRATION) - return not_found(pvmw); - if (pmd_page(*pvmw->pmd) != page) - return not_found(pvmw); - return true; - } else if (!pmd_present(*pvmw->pmd)) { - if (thp_migration_supported()) { - if (!(pvmw->flags & PVMW_MIGRATION)) + end = PageTransCompound(page) ? + vma_address_end(page, pvmw->vma) : + pvmw->address + PAGE_SIZE; + if (pvmw->pte) + goto next_pte; +restart: + do { + pgd = pgd_offset(mm, pvmw->address); + if (!pgd_present(*pgd)) { + step_forward(pvmw, PGDIR_SIZE); + continue; + } + p4d = p4d_offset(pgd, pvmw->address); + if (!p4d_present(*p4d)) { + step_forward(pvmw, P4D_SIZE); + continue; + } + pud = pud_offset(p4d, pvmw->address); + if (!pud_present(*pud)) { + step_forward(pvmw, PUD_SIZE); + continue; + } + + pvmw->pmd = pmd_offset(pud, pvmw->address); + /* + * Make sure the pmd value isn't cached in a register by the + * compiler and used as a stale value after we've observed a + * subsequent update. + */ + pmde = READ_ONCE(*pvmw->pmd); + + if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { + pvmw->ptl = pmd_lock(mm, pvmw->pmd); + pmde = *pvmw->pmd; + if (likely(pmd_trans_huge(pmde))) { + if (pvmw->flags & PVMW_MIGRATION) return not_found(pvmw); - if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) { - swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd); + if (pmd_page(pmde) != page) + return not_found(pvmw); + return true; + } + if (!pmd_present(pmde)) { + swp_entry_t entry; - if (migration_entry_to_page(entry) != page) - return not_found(pvmw); - return true; - } + if (!thp_migration_supported() || + !(pvmw->flags & PVMW_MIGRATION)) + return not_found(pvmw); + entry = pmd_to_swp_entry(pmde); + if (!is_migration_entry(entry) || + migration_entry_to_page(entry) != page) + return not_found(pvmw); + return true; } - return not_found(pvmw); - } else { /* THP pmd was split under us: handle on pte level */ spin_unlock(pvmw->ptl); pvmw->ptl = NULL; + } else if (!pmd_present(pmde)) { + /* + * If PVMW_SYNC, take and drop THP pmd lock so that we + * cannot return prematurely, while zap_huge_pmd() has + * cleared *pmd but not decremented compound_mapcount(). + */ + if ((pvmw->flags & PVMW_SYNC) && + PageTransCompound(page)) { + spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); + + spin_unlock(ptl); + } + step_forward(pvmw, PMD_SIZE); + continue; } - } else if (!pmd_present(pmde)) { - return false; - } - if (!map_pte(pvmw)) - goto next_pte; - while (1) { + if (!map_pte(pvmw)) + goto next_pte; +this_pte: if (check_pte(pvmw)) return true; next_pte: - /* Seek to next pte only makes sense for THP */ - if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page)) - return not_found(pvmw); do { pvmw->address += PAGE_SIZE; - if (pvmw->address >= pvmw->vma->vm_end || - pvmw->address >= - __vma_address(pvmw->page, pvmw->vma) + - thp_size(pvmw->page)) + if (pvmw->address >= end) return not_found(pvmw); /* Did we cross page table boundary? */ - if (pvmw->address % PMD_SIZE == 0) { - pte_unmap(pvmw->pte); + if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) { if (pvmw->ptl) { spin_unlock(pvmw->ptl); pvmw->ptl = NULL; } + pte_unmap(pvmw->pte); + pvmw->pte = NULL; goto restart; - } else { - pvmw->pte++; + } + pvmw->pte++; + if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) { + pvmw->ptl = pte_lockptr(mm, pvmw->pmd); + spin_lock(pvmw->ptl); } } while (pte_none(*pvmw->pte)); @@ -246,7 +285,10 @@ next_pte: pvmw->ptl = pte_lockptr(mm, pvmw->pmd); spin_lock(pvmw->ptl); } - } + goto this_pte; + } while (pvmw->address < end); + + return false; } /** @@ -265,14 +307,10 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) .vma = vma, .flags = PVMW_SYNC, }; - unsigned long start, end; - - start = __vma_address(page, vma); - end = start + thp_size(page) - PAGE_SIZE; - if (unlikely(end < vma->vm_start || start >= vma->vm_end)) + pvmw.address = vma_address(page, vma); + if (pvmw.address == -EFAULT) return 0; - pvmw.address = max(start, vma->vm_start); if (!page_vma_mapped_walk(&pvmw)) return 0; page_vma_mapped_walk_done(&pvmw); diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 9578db83e312f..4e640baf97948 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -135,8 +135,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, { pmd_t pmd; VM_BUG_ON(address & ~HPAGE_PMD_MASK); - VM_BUG_ON((pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) && - !pmd_devmap(*pmdp)) || !pmd_present(*pmdp)); + VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) && + !pmd_devmap(*pmdp)); pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return pmd; diff --git a/mm/rmap.c b/mm/rmap.c index 6657000b18d41..14f84f70c5571 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -700,7 +700,6 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) */ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) { - unsigned long address; if (PageAnon(page)) { struct anon_vma *page__anon_vma = page_anon_vma(page); /* @@ -710,15 +709,13 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) if (!vma->anon_vma || !page__anon_vma || vma->anon_vma->root != page__anon_vma->root) return -EFAULT; - } else if (page->mapping) { - if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping) - return -EFAULT; - } else + } else if (!vma->vm_file) { return -EFAULT; - address = __vma_address(page, vma); - if (unlikely(address < vma->vm_start || address >= vma->vm_end)) + } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) { return -EFAULT; - return address; + } + + return vma_address(page, vma); } pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) @@ -912,7 +909,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, */ mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, 0, vma, vma->vm_mm, address, - min(vma->vm_end, address + page_size(page))); + vma_address_end(page, vma)); mmu_notifier_invalidate_range_start(&range); while (page_vma_mapped_walk(&pvmw)) { @@ -1385,6 +1382,15 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, struct mmu_notifier_range range; enum ttu_flags flags = (enum ttu_flags)(long)arg; + /* + * When racing against e.g. zap_pte_range() on another cpu, + * in between its ptep_get_and_clear_full() and page_remove_rmap(), + * try_to_unmap() may return false when it is about to become true, + * if page table locking is skipped: use TTU_SYNC to wait for that. + */ + if (flags & TTU_SYNC) + pvmw.flags = PVMW_SYNC; + /* munlock has nothing to gain from examining un-locked vmas */ if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) return true; @@ -1406,9 +1412,10 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * Note that the page can not be free in this function as call of * try_to_unmap() must hold a reference on the page. */ + range.end = PageKsm(page) ? + address + PAGE_SIZE : vma_address_end(page, vma); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, - address, - min(vma->vm_end, address + page_size(page))); + address, range.end); if (PageHuge(page)) { /* * If sharing is possible, start and end will be adjusted @@ -1716,9 +1723,9 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg) return vma_is_temporary_stack(vma); } -static int page_mapcount_is_zero(struct page *page) +static int page_not_mapped(struct page *page) { - return !total_mapcount(page); + return !page_mapped(page); } /** @@ -1736,7 +1743,7 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags) struct rmap_walk_control rwc = { .rmap_one = try_to_unmap_one, .arg = (void *)flags, - .done = page_mapcount_is_zero, + .done = page_not_mapped, .anon_lock = page_lock_anon_vma_read, }; @@ -1757,14 +1764,15 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags) else rmap_walk(page, &rwc); - return !page_mapcount(page) ? true : false; + /* + * When racing against e.g. zap_pte_range() on another cpu, + * in between its ptep_get_and_clear_full() and page_remove_rmap(), + * try_to_unmap() may return false when it is about to become true, + * if page table locking is skipped: use TTU_SYNC to wait for that. + */ + return !page_mapcount(page); } -static int page_not_mapped(struct page *page) -{ - return !page_mapped(page); -}; - /** * try_to_munlock - try to munlock a page * @page: the page to be munlocked @@ -1859,6 +1867,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc, struct vm_area_struct *vma = avc->vma; unsigned long address = vma_address(page, vma); + VM_BUG_ON_VMA(address == -EFAULT, vma); cond_resched(); if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) @@ -1913,6 +1922,7 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc, pgoff_start, pgoff_end) { unsigned long address = vma_address(page, vma); + VM_BUG_ON_VMA(address == -EFAULT, vma); cond_resched(); if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) diff --git a/mm/truncate.c b/mm/truncate.c index 960edf5803ca9..8914ca4ce4b1e 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -173,13 +173,10 @@ void do_invalidatepage(struct page *page, unsigned int offset, * its lock, b) when a concurrent invalidate_mapping_pages got there first and * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. */ -static void -truncate_cleanup_page(struct address_space *mapping, struct page *page) +static void truncate_cleanup_page(struct page *page) { - if (page_mapped(page)) { - unsigned int nr = thp_nr_pages(page); - unmap_mapping_pages(mapping, page->index, nr, false); - } + if (page_mapped(page)) + unmap_mapping_page(page); if (page_has_private(page)) do_invalidatepage(page, 0, thp_size(page)); @@ -224,7 +221,7 @@ int truncate_inode_page(struct address_space *mapping, struct page *page) if (page->mapping != mapping) return -EIO; - truncate_cleanup_page(mapping, page); + truncate_cleanup_page(page); delete_from_page_cache(page); return 0; } @@ -362,7 +359,7 @@ void truncate_inode_pages_range(struct address_space *mapping, pagevec_add(&locked_pvec, page); } for (i = 0; i < pagevec_count(&locked_pvec); i++) - truncate_cleanup_page(mapping, locked_pvec.pages[i]); + truncate_cleanup_page(locked_pvec.pages[i]); delete_from_page_cache_batch(mapping, &locked_pvec); for (i = 0; i < pagevec_count(&locked_pvec); i++) unlock_page(locked_pvec.pages[i]); @@ -737,6 +734,16 @@ int invalidate_inode_pages2_range(struct address_space *mapping, continue; } + if (!did_range_unmap && page_mapped(page)) { + /* + * If page is mapped, before taking its lock, + * zap the rest of the file in one hit. + */ + unmap_mapping_pages(mapping, index, + (1 + end - index), false); + did_range_unmap = 1; + } + lock_page(page); WARN_ON(page_to_index(page) != index); if (page->mapping != mapping) { @@ -744,23 +751,11 @@ int invalidate_inode_pages2_range(struct address_space *mapping, continue; } wait_on_page_writeback(page); - if (page_mapped(page)) { - if (!did_range_unmap) { - /* - * Zap the rest of the file in one hit. - */ - unmap_mapping_pages(mapping, index, - (1 + end - index), false); - did_range_unmap = 1; - } else { - /* - * Just zap this page - */ - unmap_mapping_pages(mapping, index, - 1, false); - } - } + + if (page_mapped(page)) + unmap_mapping_page(page); BUG_ON(page_mapped(page)); + ret2 = do_launder_page(mapping, page); if (ret2 == 0) { if (!invalidate_complete_page2(mapping, page)) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 2917af3f5ac19..68ff19af195c6 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1421,7 +1421,7 @@ static int ethtool_get_any_eeprom(struct net_device *dev, void __user *useraddr, if (eeprom.offset + eeprom.len > total_len) return -EINVAL; - data = kmalloc(PAGE_SIZE, GFP_USER); + data = kzalloc(PAGE_SIZE, GFP_USER); if (!data) return -ENOMEM; @@ -1486,7 +1486,7 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) return -EINVAL; - data = kmalloc(PAGE_SIZE, GFP_USER); + data = kzalloc(PAGE_SIZE, GFP_USER); if (!data) return -ENOMEM; @@ -1765,7 +1765,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) return -EFAULT; test.len = test_len; - data = kmalloc_array(test_len, sizeof(u64), GFP_USER); + data = kcalloc(test_len, sizeof(u64), GFP_USER); if (!data) return -ENOMEM; @@ -2281,7 +2281,7 @@ static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr) ret = ethtool_tunable_valid(&tuna); if (ret) return ret; - data = kmalloc(tuna.len, GFP_USER); + data = kzalloc(tuna.len, GFP_USER); if (!data) return -ENOMEM; ret = ops->get_tunable(dev, &tuna, data); @@ -2473,7 +2473,7 @@ static int get_phy_tunable(struct net_device *dev, void __user *useraddr) ret = ethtool_phy_tunable_valid(&tuna); if (ret) return ret; - data = kmalloc(tuna.len, GFP_USER); + data = kzalloc(tuna.len, GFP_USER); if (!data) return -ENOMEM; if (phy_drv_tunable) { diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index b7260c8cef2e5..8267349afe231 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -572,7 +572,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, return err; } - if (!inet_sk(sk)->inet_num && inet_autobind(sk)) + if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk)) return -EAGAIN; return sk->sk_prot->connect(sk, uaddr, addr_len); } @@ -799,7 +799,7 @@ int inet_send_prepare(struct sock *sk) sock_rps_record_flow(sk); /* We may need to bind the socket. */ - if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind && + if (data_race(!inet_sk(sk)->inet_num) && !sk->sk_prot->no_autobind && inet_autobind(sk)) return -EAGAIN; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 123a6d39438f8..7c18597774297 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1989,7 +1989,7 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) return -EAFNOSUPPORT; if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0) - BUG(); + return -EINVAL; if (tb[IFLA_INET_CONF]) { nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 248856b301c45..8ce8b7300b9d3 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -952,6 +952,7 @@ bool ping_rcv(struct sk_buff *skb) struct sock *sk; struct net *net = dev_net(skb->dev); struct icmphdr *icmph = icmp_hdr(skb); + bool rc = false; /* We assume the packet has already been checked by icmp_rcv */ @@ -966,14 +967,15 @@ bool ping_rcv(struct sk_buff *skb) struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); pr_debug("rcv on socket %p\n", sk); - if (skb2) - ping_queue_rcv_skb(sk, skb2); + if (skb2 && !ping_queue_rcv_skb(sk, skb2)) + rc = true; sock_put(sk); - return true; } - pr_debug("no socket, dropping\n"); - return false; + if (!rc) + pr_debug("no socket, dropping\n"); + + return rc; } EXPORT_SYMBOL_GPL(ping_rcv); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4c881f5d9080c..884d430e23cb3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5799,7 +5799,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) return -EAFNOSUPPORT; if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0) - BUG(); + return -EINVAL; if (tb[IFLA_INET6_TOKEN]) { err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN])); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index be40f6b16199f..a83f0c2fcdf77 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1445,7 +1445,7 @@ ieee80211_get_sband(struct ieee80211_sub_if_data *sdata) rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (WARN_ON_ONCE(!chanctx_conf)) { + if (!chanctx_conf) { rcu_read_unlock(); return NULL; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6d3220c66931a..fbe26e912300d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4019,10 +4019,14 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (elems.mbssid_config_ie) bss_conf->profile_periodicity = elems.mbssid_config_ie->profile_periodicity; + else + bss_conf->profile_periodicity = 0; if (elems.ext_capab_len >= 11 && (elems.ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT)) bss_conf->ema_ap = true; + else + bss_conf->ema_ap = false; /* continue assoc process */ ifmgd->assoc_data->timeout = jiffies; @@ -5749,12 +5753,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, beacon_ies->data, beacon_ies->len); if (elem && elem->datalen >= 3) sdata->vif.bss_conf.profile_periodicity = elem->data[2]; + else + sdata->vif.bss_conf.profile_periodicity = 0; elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY, beacon_ies->data, beacon_ies->len); if (elem && elem->datalen >= 11 && (elem->data[10] & WLAN_EXT_CAPA11_EMA_SUPPORT)) sdata->vif.bss_conf.ema_ap = true; + else + sdata->vif.bss_conf.ema_ap = false; } else { assoc_data->timeout = jiffies; assoc_data->timeout_started = true; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index ef8ff0bc66f17..38b5695c2a0c8 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2250,17 +2250,15 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) sc = le16_to_cpu(hdr->seq_ctrl); frag = sc & IEEE80211_SCTL_FRAG; - if (is_multicast_ether_addr(hdr->addr1)) { - I802_DEBUG_INC(rx->local->dot11MulticastReceivedFrameCount); - goto out_no_led; - } - if (rx->sta) cache = &rx->sta->frags; if (likely(!ieee80211_has_morefrags(fc) && frag == 0)) goto out; + if (is_multicast_ether_addr(hdr->addr1)) + return RX_DROP_MONITOR; + I802_DEBUG_INC(rx->local->rx_handlers_fragments); if (skb_linearize(rx->skb)) @@ -2386,7 +2384,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) out: ieee80211_led_rx(rx->local); - out_no_led: if (rx->sta) rx->sta->rx_stats.packets++; return RX_CONTINUE; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index d8f9fb0646a4d..fbf56a203c0e8 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -954,7 +954,7 @@ static void ieee80211_parse_extension_element(u32 *crc, switch (elem->data[0]) { case WLAN_EID_EXT_HE_MU_EDCA: - if (len == sizeof(*elems->mu_edca_param_set)) { + if (len >= sizeof(*elems->mu_edca_param_set)) { elems->mu_edca_param_set = data; if (crc) *crc = crc32_be(*crc, (void *)elem, @@ -975,7 +975,7 @@ static void ieee80211_parse_extension_element(u32 *crc, } break; case WLAN_EID_EXT_UORA: - if (len == 1) + if (len >= 1) elems->uora_element = data; break; case WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME: @@ -983,7 +983,7 @@ static void ieee80211_parse_extension_element(u32 *crc, elems->max_channel_switch_time = data; break; case WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION: - if (len == sizeof(*elems->mbssid_config_ie)) + if (len >= sizeof(*elems->mbssid_config_ie)) elems->mbssid_config_ie = data; break; case WLAN_EID_EXT_HE_SPR: @@ -992,7 +992,7 @@ static void ieee80211_parse_extension_element(u32 *crc, elems->he_spr = data; break; case WLAN_EID_EXT_HE_6GHZ_CAPA: - if (len == sizeof(*elems->he_6ghz_capa)) + if (len >= sizeof(*elems->he_6ghz_capa)) elems->he_6ghz_capa = data; break; } @@ -1081,14 +1081,14 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, switch (id) { case WLAN_EID_LINK_ID: - if (elen + 2 != sizeof(struct ieee80211_tdls_lnkie)) { + if (elen + 2 < sizeof(struct ieee80211_tdls_lnkie)) { elem_parse_failed = true; break; } elems->lnk_id = (void *)(pos - 2); break; case WLAN_EID_CHAN_SWITCH_TIMING: - if (elen != sizeof(struct ieee80211_ch_switch_timing)) { + if (elen < sizeof(struct ieee80211_ch_switch_timing)) { elem_parse_failed = true; break; } @@ -1251,7 +1251,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elems->sec_chan_offs = (void *)pos; break; case WLAN_EID_CHAN_SWITCH_PARAM: - if (elen != + if (elen < sizeof(*elems->mesh_chansw_params_ie)) { elem_parse_failed = true; break; @@ -1260,7 +1260,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, break; case WLAN_EID_WIDE_BW_CHANNEL_SWITCH: if (!action || - elen != sizeof(*elems->wide_bw_chansw_ie)) { + elen < sizeof(*elems->wide_bw_chansw_ie)) { elem_parse_failed = true; break; } @@ -1279,7 +1279,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, ie = cfg80211_find_ie(WLAN_EID_WIDE_BW_CHANNEL_SWITCH, pos, elen); if (ie) { - if (ie[1] == sizeof(*elems->wide_bw_chansw_ie)) + if (ie[1] >= sizeof(*elems->wide_bw_chansw_ie)) elems->wide_bw_chansw_ie = (void *)(ie + 2); else @@ -1323,7 +1323,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elems->cisco_dtpc_elem = pos; break; case WLAN_EID_ADDBA_EXT: - if (elen != sizeof(struct ieee80211_addba_ext_ie)) { + if (elen < sizeof(struct ieee80211_addba_ext_ie)) { elem_parse_failed = true; break; } @@ -1349,7 +1349,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elem, elems); break; case WLAN_EID_S1G_CAPABILITIES: - if (elen == sizeof(*elems->s1g_capab)) + if (elen >= sizeof(*elems->s1g_capab)) elems->s1g_capab = (void *)pos; else elem_parse_failed = true; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ddb68aa836f71..08144559eed56 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2682,7 +2682,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); - proto = po->num; + proto = READ_ONCE(po->num); } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@ -2895,7 +2895,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); - proto = po->num; + proto = READ_ONCE(po->num); } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@ -3033,10 +3033,13 @@ static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); - if (po->tx_ring.pg_vec) + /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy. + * tpacket_snd() will redo the check safely. + */ + if (data_race(po->tx_ring.pg_vec)) return tpacket_snd(po, msg); - else - return packet_snd(sock, msg, len); + + return packet_snd(sock, msg, len); } /* @@ -3167,7 +3170,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, /* prevents packet_notifier() from calling * register_prot_hook() */ - po->num = 0; + WRITE_ONCE(po->num, 0); __unregister_prot_hook(sk, true); rcu_read_lock(); dev_curr = po->prot_hook.dev; @@ -3177,17 +3180,17 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, } BUG_ON(po->running); - po->num = proto; + WRITE_ONCE(po->num, proto); po->prot_hook.type = proto; if (unlikely(unlisted)) { dev_put(dev); po->prot_hook.dev = NULL; - po->ifindex = -1; + WRITE_ONCE(po->ifindex, -1); packet_cached_dev_reset(po); } else { po->prot_hook.dev = dev; - po->ifindex = dev ? dev->ifindex : 0; + WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0); packet_cached_dev_assign(po, dev); } } @@ -3501,7 +3504,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, uaddr->sa_family = AF_PACKET; memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); rcu_read_lock(); - dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); + dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); if (dev) strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); rcu_read_unlock(); @@ -3516,16 +3519,18 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr); + int ifindex; if (peer) return -EOPNOTSUPP; + ifindex = READ_ONCE(po->ifindex); sll->sll_family = AF_PACKET; - sll->sll_ifindex = po->ifindex; - sll->sll_protocol = po->num; + sll->sll_ifindex = ifindex; + sll->sll_protocol = READ_ONCE(po->num); sll->sll_pkttype = 0; rcu_read_lock(); - dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex); + dev = dev_get_by_index_rcu(sock_net(sk), ifindex); if (dev) { sll->sll_hatype = dev->type; sll->sll_halen = dev->addr_len; @@ -4104,7 +4109,7 @@ static int packet_notifier(struct notifier_block *this, } if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); - po->ifindex = -1; + WRITE_ONCE(po->ifindex, -1); if (po->prot_hook.dev) dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; @@ -4410,7 +4415,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, was_running = po->running; num = po->num; if (was_running) { - po->num = 0; + WRITE_ONCE(po->num, 0); __unregister_prot_hook(sk, false); } spin_unlock(&po->bind_lock); @@ -4445,7 +4450,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, spin_lock(&po->bind_lock); if (was_running) { - po->num = num; + WRITE_ONCE(po->num, num); register_prot_hook(sk); } spin_unlock(&po->bind_lock); @@ -4613,8 +4618,8 @@ static int packet_seq_show(struct seq_file *seq, void *v) s, refcount_read(&s->sk_refcnt), s->sk_type, - ntohs(po->num), - po->ifindex, + ntohs(READ_ONCE(po->num)), + READ_ONCE(po->ifindex), po->running, atomic_read(&s->sk_rmem_alloc), from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)), diff --git a/net/wireless/util.c b/net/wireless/util.c index 2731267fd0f9e..4fb8d1b14e76a 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1059,6 +1059,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, case NL80211_IFTYPE_MESH_POINT: /* mesh should be handled? */ break; + case NL80211_IFTYPE_OCB: + cfg80211_leave_ocb(rdev, dev); + break; default: break; } diff --git a/scripts/Makefile b/scripts/Makefile index c36106bce80ee..9adb6d247818f 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -14,6 +14,7 @@ hostprogs-always-$(CONFIG_ASN1) += asn1_compiler hostprogs-always-$(CONFIG_MODULE_SIG_FORMAT) += sign-file hostprogs-always-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += extract-cert hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert +hostprogs-always-$(CONFIG_SYSTEM_REVOCATION_LIST) += extract-cert HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index f9b19524da112..1e9baa5c4fc6e 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -192,15 +192,20 @@ static unsigned int get_symindex(Elf_Sym const *sym, Elf32_Word const *symtab, Elf32_Word const *symtab_shndx) { unsigned long offset; + unsigned short shndx = w2(sym->st_shndx); int index; - if (sym->st_shndx != SHN_XINDEX) - return w2(sym->st_shndx); + if (shndx > SHN_UNDEF && shndx < SHN_LORESERVE) + return shndx; - offset = (unsigned long)sym - (unsigned long)symtab; - index = offset / sizeof(*sym); + if (shndx == SHN_XINDEX) { + offset = (unsigned long)sym - (unsigned long)symtab; + index = offset / sizeof(*sym); - return w(symtab_shndx[index]); + return w(symtab_shndx[index]); + } + + return 0; } static unsigned int get_shnum(Elf_Ehdr const *ehdr, Elf_Shdr const *shdr0) diff --git a/security/integrity/platform_certs/keyring_handler.c b/security/integrity/platform_certs/keyring_handler.c index c5ba695c10e3a..5604bd57c9907 100644 --- a/security/integrity/platform_certs/keyring_handler.c +++ b/security/integrity/platform_certs/keyring_handler.c @@ -55,6 +55,15 @@ static __init void uefi_blacklist_binary(const char *source, uefi_blacklist_hash(source, data, len, "bin:", 4); } +/* + * Add an X509 cert to the revocation list. + */ +static __init void uefi_revocation_list_x509(const char *source, + const void *data, size_t len) +{ + add_key_to_revocation_list(data, len); +} + /* * Return the appropriate handler for particular signature list types found in * the UEFI db and MokListRT tables. @@ -76,5 +85,7 @@ __init efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type) return uefi_blacklist_x509_tbs; if (efi_guidcmp(*sig_type, efi_cert_sha256_guid) == 0) return uefi_blacklist_binary; + if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0) + return uefi_revocation_list_x509; return 0; } diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c index ee4b4c666854f..f290f78c3f301 100644 --- a/security/integrity/platform_certs/load_uefi.c +++ b/security/integrity/platform_certs/load_uefi.c @@ -132,8 +132,9 @@ static int __init load_moklist_certs(void) static int __init load_uefi_certs(void) { efi_guid_t secure_var = EFI_IMAGE_SECURITY_DATABASE_GUID; - void *db = NULL, *dbx = NULL; - unsigned long dbsize = 0, dbxsize = 0; + efi_guid_t mok_var = EFI_SHIM_LOCK_GUID; + void *db = NULL, *dbx = NULL, *mokx = NULL; + unsigned long dbsize = 0, dbxsize = 0, mokxsize = 0; efi_status_t status; int rc = 0; @@ -175,6 +176,21 @@ static int __init load_uefi_certs(void) kfree(dbx); } + mokx = get_cert_list(L"MokListXRT", &mok_var, &mokxsize, &status); + if (!mokx) { + if (status == EFI_NOT_FOUND) + pr_debug("mokx variable wasn't found\n"); + else + pr_info("Couldn't get mokx list\n"); + } else { + rc = parse_efi_signature_list("UEFI:MokListXRT", + mokx, mokxsize, + get_handler_for_dbx); + if (rc) + pr_err("Couldn't parse mokx signatures %d\n", rc); + kfree(mokx); + } + /* Load the MokListRT certs */ rc = load_moklist_certs(); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 126c6727a6b09..49805fd16fdf5 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -55,7 +55,7 @@ int kvm_check_cap(long cap) exit(KSFT_SKIP); ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); - TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n" + TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n" " rc: %i errno: %i", ret, errno); close(kvm_fd); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f446c36f58003..1353439691cf7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1883,6 +1883,13 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault) return true; } +static int kvm_try_get_pfn(kvm_pfn_t pfn) +{ + if (kvm_is_reserved_pfn(pfn)) + return 1; + return get_page_unless_zero(pfn_to_page(pfn)); +} + static int hva_to_pfn_remapped(struct vm_area_struct *vma, unsigned long addr, bool *async, bool write_fault, bool *writable, @@ -1932,13 +1939,21 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, * Whoever called remap_pfn_range is also going to call e.g. * unmap_mapping_range before the underlying pages are freed, * causing a call to our MMU notifier. + * + * Certain IO or PFNMAP mappings can be backed with valid + * struct pages, but be allocated without refcounting e.g., + * tail pages of non-compound higher order allocations, which + * would then underflow the refcount when the caller does the + * required put_page. Don't allow those pages here. */ - kvm_get_pfn(pfn); + if (!kvm_try_get_pfn(pfn)) + r = -EFAULT; out: pte_unmap_unlock(ptep, ptl); *p_pfn = pfn; - return 0; + + return r; } /*