diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 9000640f7f7a0..d9fce65b2f047 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -63,6 +63,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #1902691 | ARM64_ERRATUM_1902691 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A520 | #2966298 | ARM64_ERRATUM_2966298 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 | diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 3301288a7c692..f5f7a464605f9 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2148,6 +2148,14 @@ accept_ra_min_hop_limit - INTEGER Default: 1 +accept_ra_min_lft - INTEGER + Minimum acceptable lifetime value in Router Advertisement. + + RA sections with a lifetime less than this value shall be + ignored. Zero lifetimes stay unaffected. + + Default: 0 + accept_ra_pinfo - BOOLEAN Learn Prefix Information in Router Advertisement. diff --git a/Makefile b/Makefile index 9ceda3dad5eb7..b435b56594f0f 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 56 +SUBLEVEL = 57 EXTRAVERSION = NAME = Curry Ramen diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index d5eb2fbab473e..9ee9e17eb2ca0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -983,6 +983,19 @@ config ARM64_ERRATUM_2457168 If unsure, say Y. +config ARM64_ERRATUM_2966298 + bool "Cortex-A520: 2966298: workaround for speculatively executed unprivileged load" + default y + help + This option adds the workaround for ARM Cortex-A520 erratum 2966298. + + On an affected Cortex-A520 core, a speculatively executed unprivileged + load might leak data from a privileged level via a cache side channel. + + Work around this problem by executing a TLBI before returning to EL0. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index f73f11b550425..a0badda3a8d1c 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -670,7 +670,7 @@ static inline bool supports_clearbhb(int scope) isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); return cpuid_feature_extract_unsigned_field(isar2, - ID_AA64ISAR2_EL1_BC_SHIFT); + ID_AA64ISAR2_EL1_CLRBHB_SHIFT); } const struct cpumask *system_32bit_el0_cpumask(void); @@ -863,7 +863,11 @@ static inline bool cpu_has_hw_af(void) if (!IS_ENABLED(CONFIG_ARM64_HW_AFDBM)) return false; - mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); + /* + * Use cached version to avoid emulated msr operation on KVM + * guests. + */ + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); return cpuid_feature_extract_unsigned_field(mmfr1, ID_AA64MMFR1_EL1_HAFDBS_SHIFT); } diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 65e53ef5a3960..357932938b5ab 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -79,6 +79,7 @@ #define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 +#define ARM_CPU_PART_CORTEX_A520 0xD80 #define ARM_CPU_PART_CORTEX_A710 0xD47 #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 @@ -141,6 +142,7 @@ #define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) +#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 8dbf3c21ea22a..3f917124684c5 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -723,6 +723,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .cpu_enable = cpu_clear_bf16_from_user_emulation, }, #endif +#ifdef CONFIG_ARM64_ERRATUM_2966298 + { + .desc = "ARM erratum 2966298", + .capability = ARM64_WORKAROUND_2966298, + /* Cortex-A520 r0p0 - r0p1 */ + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1), + }, +#endif #ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38 { .desc = "AmpereOne erratum AC03_CPU_38", diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b3eb53847c96b..770a31c6ed81b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -212,7 +212,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CLRBHB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index beb4db21c89c1..de16fa917e1b8 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -419,6 +419,10 @@ alternative_else_nop_endif ldp x28, x29, [sp, #16 * 14] .if \el == 0 +alternative_if ARM64_WORKAROUND_2966298 + tlbi vale1, xzr + dsb nsh +alternative_else_nop_endif alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 ldr lr, [sp, #S_LR] add sp, sp, #PT_REGS_SIZE // restore sp diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 14d31d1b2ff02..e73830d9f1367 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -71,6 +71,7 @@ WORKAROUND_2064142 WORKAROUND_2077057 WORKAROUND_2457168 WORKAROUND_2658417 +WORKAROUND_2966298 WORKAROUND_AMPERE_AC03_CPU_38 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 384757a7eda9e..11c3f7a7cec7b 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -484,7 +484,11 @@ EndEnum EndSysreg Sysreg ID_AA64ISAR2_EL1 3 0 0 6 2 -Res0 63:28 +Res0 63:32 +Enum 31:28 CLRBHB + 0b0000 NI + 0b0001 IMP +EndEnum Enum 27:24 PAC_frac 0b0000 NI 0b0001 IMP diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index 6d28b5514699a..10a061d6899cd 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -2,14 +2,28 @@ #ifndef __PARISC_LDCW_H #define __PARISC_LDCW_H -#ifndef CONFIG_PA20 /* Because kmalloc only guarantees 8-byte alignment for kmalloc'd data, and GCC only guarantees 8-byte alignment for stack locals, we can't be assured of 16-byte alignment for atomic lock data even if we specify "__attribute ((aligned(16)))" in the type declaration. So, we use a struct containing an array of four ints for the atomic lock type and dynamically select the 16-byte aligned int from the array - for the semaphore. */ + for the semaphore. */ + +/* From: "Jim Hull" + I've attached a summary of the change, but basically, for PA 2.0, as + long as the ",CO" (coherent operation) completer is implemented, then the + 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead + they only require "natural" alignment (4-byte for ldcw, 8-byte for + ldcd). + + Although the cache control hint is accepted by all PA 2.0 processors, + it is only implemented on PA8800/PA8900 CPUs. Prior PA8X00 CPUs still + require 16-byte alignment. If the address is unaligned, the operation + of the instruction is undefined. The ldcw instruction does not generate + unaligned data reference traps so misaligned accesses are not detected. + This hid the problem for years. So, restore the 16-byte alignment dropped + by Kyle McMartin in "Remove __ldcw_align for PA-RISC 2.0 processors". */ #define __PA_LDCW_ALIGNMENT 16 #define __PA_LDCW_ALIGN_ORDER 4 @@ -19,22 +33,12 @@ & ~(__PA_LDCW_ALIGNMENT - 1); \ (volatile unsigned int *) __ret; \ }) -#define __LDCW "ldcw" -#else /*CONFIG_PA20*/ -/* From: "Jim Hull" - I've attached a summary of the change, but basically, for PA 2.0, as - long as the ",CO" (coherent operation) completer is specified, then the - 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead - they only require "natural" alignment (4-byte for ldcw, 8-byte for - ldcd). */ - -#define __PA_LDCW_ALIGNMENT 4 -#define __PA_LDCW_ALIGN_ORDER 2 -#define __ldcw_align(a) (&(a)->slock) +#ifdef CONFIG_PA20 #define __LDCW "ldcw,co" - -#endif /*!CONFIG_PA20*/ +#else +#define __LDCW "ldcw" +#endif /* LDCW, the only atomic read-write operation PA-RISC has. *sigh*. We don't explicitly expose that "*a" may be written as reload diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h index ca39ee350c3f4..35c5086b74d70 100644 --- a/arch/parisc/include/asm/spinlock_types.h +++ b/arch/parisc/include/asm/spinlock_types.h @@ -3,13 +3,8 @@ #define __ASM_SPINLOCK_TYPES_H typedef struct { -#ifdef CONFIG_PA20 - volatile unsigned int slock; -# define __ARCH_SPIN_LOCK_UNLOCKED { 1 } -#else volatile unsigned int lock[4]; # define __ARCH_SPIN_LOCK_UNLOCKED { { 1, 1, 1, 1 } } -#endif } arch_spinlock_t; diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 7dbd92cafae38..e37ec05487308 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -443,7 +443,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) if (cpu_online(cpu)) return 0; - if (num_online_cpus() < setup_max_cpus && smp_boot_one_cpu(cpu, tidle)) + if (num_online_cpus() < nr_cpu_ids && + num_online_cpus() < setup_max_cpus && + smp_boot_one_cpu(cpu, tidle)) return -EIO; return cpu_online(cpu) ? 0 : -EIO; diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 6672a3f05fc68..04f4b96dec6df 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -534,8 +534,12 @@ static void amd_pmu_cpu_reset(int cpu) /* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */ wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0); - /* Clear overflow bits i.e. PerfCntrGLobalStatus.PerfCntrOvfl */ - wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, amd_pmu_global_cntr_mask); + /* + * Clear freeze and overflow bits i.e. PerfCntrGLobalStatus.LbrFreeze + * and PerfCntrGLobalStatus.PerfCntrOvfl + */ + wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, + GLOBAL_STATUS_LBRS_FROZEN | amd_pmu_global_cntr_mask); } static int amd_pmu_cpu_prepare(int cpu) @@ -570,6 +574,7 @@ static void amd_pmu_cpu_starting(int cpu) int i, nb_id; cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; + amd_pmu_cpu_reset(cpu); if (!x86_pmu.amd_nb_constraints) return; @@ -591,8 +596,6 @@ static void amd_pmu_cpu_starting(int cpu) cpuc->amd_nb->nb_id = nb_id; cpuc->amd_nb->refcnt++; - - amd_pmu_cpu_reset(cpu); } static void amd_pmu_cpu_dead(int cpu) @@ -601,6 +604,7 @@ static void amd_pmu_cpu_dead(int cpu) kfree(cpuhw->lbr_sel); cpuhw->lbr_sel = NULL; + amd_pmu_cpu_reset(cpu); if (!x86_pmu.amd_nb_constraints) return; @@ -613,8 +617,6 @@ static void amd_pmu_cpu_dead(int cpu) cpuhw->amd_nb = NULL; } - - amd_pmu_cpu_reset(cpu); } static inline void amd_pmu_set_global_ctl(u64 ctl) @@ -884,7 +886,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) struct hw_perf_event *hwc; struct perf_event *event; int handled = 0, idx; - u64 status, mask; + u64 reserved, status, mask; bool pmu_enabled; /* @@ -909,6 +911,14 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) status &= ~GLOBAL_STATUS_LBRS_FROZEN; } + reserved = status & ~amd_pmu_global_cntr_mask; + if (reserved) + pr_warn_once("Reserved PerfCntrGlobalStatus bits are set (0x%llx), please consider updating microcode\n", + reserved); + + /* Clear any reserved bits set by buggy microcode */ + status &= amd_pmu_global_cntr_mask; + for (idx = 0; idx < x86_pmu.num_counters; idx++) { if (!test_bit(idx, cpuc->active_mask)) continue; diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 3a5b0c9c4fccc..7dce812ce2538 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -253,7 +253,7 @@ static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg) return 0; } -static int sev_cpuid_hv(struct cpuid_leaf *leaf) +static int __sev_cpuid_hv_msr(struct cpuid_leaf *leaf) { int ret; @@ -276,6 +276,45 @@ static int sev_cpuid_hv(struct cpuid_leaf *leaf) return ret; } +static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) +{ + u32 cr4 = native_read_cr4(); + int ret; + + ghcb_set_rax(ghcb, leaf->fn); + ghcb_set_rcx(ghcb, leaf->subfn); + + if (cr4 & X86_CR4_OSXSAVE) + /* Safe to read xcr0 */ + ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK)); + else + /* xgetbv will cause #UD - use reset value for xcr0 */ + ghcb_set_xcr0(ghcb, 1); + + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0); + if (ret != ES_OK) + return ret; + + if (!(ghcb_rax_is_valid(ghcb) && + ghcb_rbx_is_valid(ghcb) && + ghcb_rcx_is_valid(ghcb) && + ghcb_rdx_is_valid(ghcb))) + return ES_VMM_ERROR; + + leaf->eax = ghcb->save.rax; + leaf->ebx = ghcb->save.rbx; + leaf->ecx = ghcb->save.rcx; + leaf->edx = ghcb->save.rdx; + + return ES_OK; +} + +static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) +{ + return ghcb ? __sev_cpuid_hv_ghcb(ghcb, ctxt, leaf) + : __sev_cpuid_hv_msr(leaf); +} + /* * This may be called early while still running on the initial identity * mapping. Use RIP-relative addressing to obtain the correct address @@ -385,19 +424,20 @@ snp_cpuid_get_validated_func(struct cpuid_leaf *leaf) return false; } -static void snp_cpuid_hv(struct cpuid_leaf *leaf) +static void snp_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) { - if (sev_cpuid_hv(leaf)) + if (sev_cpuid_hv(ghcb, ctxt, leaf)) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV); } -static int snp_cpuid_postprocess(struct cpuid_leaf *leaf) +static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt, + struct cpuid_leaf *leaf) { struct cpuid_leaf leaf_hv = *leaf; switch (leaf->fn) { case 0x1: - snp_cpuid_hv(&leaf_hv); + snp_cpuid_hv(ghcb, ctxt, &leaf_hv); /* initial APIC ID */ leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0)); @@ -416,7 +456,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf) break; case 0xB: leaf_hv.subfn = 0; - snp_cpuid_hv(&leaf_hv); + snp_cpuid_hv(ghcb, ctxt, &leaf_hv); /* extended APIC ID */ leaf->edx = leaf_hv.edx; @@ -464,7 +504,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf) } break; case 0x8000001E: - snp_cpuid_hv(&leaf_hv); + snp_cpuid_hv(ghcb, ctxt, &leaf_hv); /* extended APIC ID */ leaf->eax = leaf_hv.eax; @@ -485,7 +525,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf) * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value * should be treated as fatal by caller. */ -static int snp_cpuid(struct cpuid_leaf *leaf) +static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -519,7 +559,7 @@ static int snp_cpuid(struct cpuid_leaf *leaf) return 0; } - return snp_cpuid_postprocess(leaf); + return snp_cpuid_postprocess(ghcb, ctxt, leaf); } /* @@ -541,14 +581,14 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) leaf.fn = fn; leaf.subfn = subfn; - ret = snp_cpuid(&leaf); + ret = snp_cpuid(NULL, NULL, &leaf); if (!ret) goto cpuid_done; if (ret != -EOPNOTSUPP) goto fail; - if (sev_cpuid_hv(&leaf)) + if (__sev_cpuid_hv_msr(&leaf)) goto fail; cpuid_done: @@ -845,14 +885,15 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) return ret; } -static int vc_handle_cpuid_snp(struct pt_regs *regs) +static int vc_handle_cpuid_snp(struct ghcb *ghcb, struct es_em_ctxt *ctxt) { + struct pt_regs *regs = ctxt->regs; struct cpuid_leaf leaf; int ret; leaf.fn = regs->ax; leaf.subfn = regs->cx; - ret = snp_cpuid(&leaf); + ret = snp_cpuid(ghcb, ctxt, &leaf); if (!ret) { regs->ax = leaf.eax; regs->bx = leaf.ebx; @@ -871,7 +912,7 @@ static enum es_result vc_handle_cpuid(struct ghcb *ghcb, enum es_result ret; int snp_cpuid_ret; - snp_cpuid_ret = vc_handle_cpuid_snp(regs); + snp_cpuid_ret = vc_handle_cpuid_snp(ghcb, ctxt); if (!snp_cpuid_ret) return ES_OK; if (snp_cpuid_ret != -EOPNOTSUPP) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index a582ea0da74f5..a82bdec923b21 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -737,6 +737,7 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head) struct request_queue *q = container_of(rcu_head, struct request_queue, rcu_head); + percpu_ref_exit(&q->q_usage_counter); kmem_cache_free(blk_get_queue_kmem_cache(blk_queue_has_srcu(q)), q); } @@ -762,8 +763,6 @@ static void blk_release_queue(struct kobject *kobj) might_sleep(); - percpu_ref_exit(&q->q_usage_counter); - if (q->poll_stat) blk_stat_remove_callback(q, q->poll_cb); blk_stat_free_callback(q->poll_cb); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 25b9bdf2fc380..6a053cd0cf410 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5022,11 +5022,27 @@ static const unsigned int ata_port_suspend_ehi = ATA_EHI_QUIET static void ata_port_suspend(struct ata_port *ap, pm_message_t mesg) { + /* + * We are about to suspend the port, so we do not care about + * scsi_rescan_device() calls scheduled by previous resume operations. + * The next resume will schedule the rescan again. So cancel any rescan + * that is not done yet. + */ + cancel_delayed_work_sync(&ap->scsi_rescan_task); + ata_port_request_pm(ap, mesg, 0, ata_port_suspend_ehi, false); } static void ata_port_suspend_async(struct ata_port *ap, pm_message_t mesg) { + /* + * We are about to suspend the port, so we do not care about + * scsi_rescan_device() calls scheduled by previous resume operations. + * The next resume will schedule the rescan again. So cancel any rescan + * that is not done yet. + */ + cancel_delayed_work_sync(&ap->scsi_rescan_task); + ata_port_request_pm(ap, mesg, 0, ata_port_suspend_ehi, true); } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index d28628b964e29..7b9c9264b9a72 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1081,7 +1081,15 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev) } } else { sdev->sector_size = ata_id_logical_sector_size(dev->id); - sdev->manage_start_stop = 1; + /* + * Stop the drive on suspend but do not issue START STOP UNIT + * on resume as this is not necessary and may fail: the device + * will be woken up by ata_port_pm_resume() with a port reset + * and device revalidation. + */ + sdev->manage_system_start_stop = true; + sdev->manage_runtime_start_stop = true; + sdev->no_start_on_resume = 1; } /* @@ -4640,7 +4648,7 @@ void ata_scsi_dev_rescan(struct work_struct *work) struct ata_link *link; struct ata_device *dev; unsigned long flags; - bool delay_rescan = false; + int ret = 0; mutex_lock(&ap->scsi_scan_mutex); spin_lock_irqsave(ap->lock, flags); @@ -4649,37 +4657,34 @@ void ata_scsi_dev_rescan(struct work_struct *work) ata_for_each_dev(dev, link, ENABLED) { struct scsi_device *sdev = dev->sdev; + /* + * If the port was suspended before this was scheduled, + * bail out. + */ + if (ap->pflags & ATA_PFLAG_SUSPENDED) + goto unlock; + if (!sdev) continue; if (scsi_device_get(sdev)) continue; - /* - * If the rescan work was scheduled because of a resume - * event, the port is already fully resumed, but the - * SCSI device may not yet be fully resumed. In such - * case, executing scsi_rescan_device() may cause a - * deadlock with the PM code on device_lock(). Prevent - * this by giving up and retrying rescan after a short - * delay. - */ - delay_rescan = sdev->sdev_gendev.power.is_suspended; - if (delay_rescan) { - scsi_device_put(sdev); - break; - } - spin_unlock_irqrestore(ap->lock, flags); - scsi_rescan_device(&(sdev->sdev_gendev)); + ret = scsi_rescan_device(sdev); scsi_device_put(sdev); spin_lock_irqsave(ap->lock, flags); + + if (ret) + goto unlock; } } +unlock: spin_unlock_irqrestore(ap->lock, flags); mutex_unlock(&ap->scsi_scan_mutex); - if (delay_rescan) + /* Reschedule with a delay if scsi_rescan_device() returned an error */ + if (ret) schedule_delayed_work(&ap->scsi_rescan_task, msecs_to_jiffies(5)); } diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c index ae6b8788d5f3f..d65715b9e129e 100644 --- a/drivers/base/regmap/regcache-rbtree.c +++ b/drivers/base/regmap/regcache-rbtree.c @@ -453,7 +453,8 @@ static int regcache_rbtree_write(struct regmap *map, unsigned int reg, if (!rbnode) return -ENOMEM; regcache_rbtree_set_register(map, rbnode, - reg - rbnode->base_reg, value); + (reg - rbnode->base_reg) / map->reg_stride, + value); regcache_rbtree_insert(map, &rbtree_ctx->root, rbnode); rbtree_ctx->cached_rbnode = rbnode; } diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 74ef3da545361..afc92869cba42 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -632,9 +632,8 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); static int rbd_dev_refresh(struct rbd_device *rbd_dev); -static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev); -static int rbd_dev_header_info(struct rbd_device *rbd_dev); -static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev); +static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev, + struct rbd_image_header *header); static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u64 snap_id); static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, @@ -995,15 +994,24 @@ static void rbd_init_layout(struct rbd_device *rbd_dev) RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL); } +static void rbd_image_header_cleanup(struct rbd_image_header *header) +{ + kfree(header->object_prefix); + ceph_put_snap_context(header->snapc); + kfree(header->snap_sizes); + kfree(header->snap_names); + + memset(header, 0, sizeof(*header)); +} + /* * Fill an rbd image header with information from the given format 1 * on-disk header. */ -static int rbd_header_from_disk(struct rbd_device *rbd_dev, - struct rbd_image_header_ondisk *ondisk) +static int rbd_header_from_disk(struct rbd_image_header *header, + struct rbd_image_header_ondisk *ondisk, + bool first_time) { - struct rbd_image_header *header = &rbd_dev->header; - bool first_time = header->object_prefix == NULL; struct ceph_snap_context *snapc; char *object_prefix = NULL; char *snap_names = NULL; @@ -1070,11 +1078,6 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev, if (first_time) { header->object_prefix = object_prefix; header->obj_order = ondisk->options.order; - rbd_init_layout(rbd_dev); - } else { - ceph_put_snap_context(header->snapc); - kfree(header->snap_names); - kfree(header->snap_sizes); } /* The remaining fields always get updated (when we refresh) */ @@ -4860,7 +4863,9 @@ out_req: * return, the rbd_dev->header field will contain up-to-date * information about the image. */ -static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev) +static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev, + struct rbd_image_header *header, + bool first_time) { struct rbd_image_header_ondisk *ondisk = NULL; u32 snap_count = 0; @@ -4908,7 +4913,7 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev) snap_count = le32_to_cpu(ondisk->snap_count); } while (snap_count != want_count); - ret = rbd_header_from_disk(rbd_dev, ondisk); + ret = rbd_header_from_disk(header, ondisk, first_time); out: kfree(ondisk); @@ -4932,39 +4937,6 @@ static void rbd_dev_update_size(struct rbd_device *rbd_dev) } } -static int rbd_dev_refresh(struct rbd_device *rbd_dev) -{ - u64 mapping_size; - int ret; - - down_write(&rbd_dev->header_rwsem); - mapping_size = rbd_dev->mapping.size; - - ret = rbd_dev_header_info(rbd_dev); - if (ret) - goto out; - - /* - * If there is a parent, see if it has disappeared due to the - * mapped image getting flattened. - */ - if (rbd_dev->parent) { - ret = rbd_dev_v2_parent_info(rbd_dev); - if (ret) - goto out; - } - - rbd_assert(!rbd_is_snap(rbd_dev)); - rbd_dev->mapping.size = rbd_dev->header.image_size; - -out: - up_write(&rbd_dev->header_rwsem); - if (!ret && mapping_size != rbd_dev->mapping.size) - rbd_dev_update_size(rbd_dev); - - return ret; -} - static const struct blk_mq_ops rbd_mq_ops = { .queue_rq = rbd_queue_rq, }; @@ -5504,17 +5476,12 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, return 0; } -static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev) -{ - return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP, - &rbd_dev->header.obj_order, - &rbd_dev->header.image_size); -} - -static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) +static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev, + char **pobject_prefix) { size_t size; void *reply_buf; + char *object_prefix; int ret; void *p; @@ -5532,16 +5499,16 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) goto out; p = reply_buf; - rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, - p + ret, NULL, GFP_NOIO); + object_prefix = ceph_extract_encoded_string(&p, p + ret, NULL, + GFP_NOIO); + if (IS_ERR(object_prefix)) { + ret = PTR_ERR(object_prefix); + goto out; + } ret = 0; - if (IS_ERR(rbd_dev->header.object_prefix)) { - ret = PTR_ERR(rbd_dev->header.object_prefix); - rbd_dev->header.object_prefix = NULL; - } else { - dout(" object_prefix = %s\n", rbd_dev->header.object_prefix); - } + *pobject_prefix = object_prefix; + dout(" object_prefix = %s\n", object_prefix); out: kfree(reply_buf); @@ -5592,13 +5559,6 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, return 0; } -static int rbd_dev_v2_features(struct rbd_device *rbd_dev) -{ - return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP, - rbd_is_ro(rbd_dev), - &rbd_dev->header.features); -} - /* * These are generic image flags, but since they are used only for * object map, store them in rbd_dev->object_map_flags. @@ -5635,6 +5595,14 @@ struct parent_image_info { u64 overlap; }; +static void rbd_parent_info_cleanup(struct parent_image_info *pii) +{ + kfree(pii->pool_ns); + kfree(pii->image_id); + + memset(pii, 0, sizeof(*pii)); +} + /* * The caller is responsible for @pii. */ @@ -5704,6 +5672,9 @@ static int __get_parent_info(struct rbd_device *rbd_dev, if (pii->has_overlap) ceph_decode_64_safe(&p, end, pii->overlap, e_inval); + dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n", + __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id, + pii->has_overlap, pii->overlap); return 0; e_inval: @@ -5742,14 +5713,17 @@ static int __get_parent_info_legacy(struct rbd_device *rbd_dev, pii->has_overlap = true; ceph_decode_64_safe(&p, end, pii->overlap, e_inval); + dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n", + __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id, + pii->has_overlap, pii->overlap); return 0; e_inval: return -EINVAL; } -static int get_parent_info(struct rbd_device *rbd_dev, - struct parent_image_info *pii) +static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev, + struct parent_image_info *pii) { struct page *req_page, *reply_page; void *p; @@ -5777,7 +5751,7 @@ static int get_parent_info(struct rbd_device *rbd_dev, return ret; } -static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) +static int rbd_dev_setup_parent(struct rbd_device *rbd_dev) { struct rbd_spec *parent_spec; struct parent_image_info pii = { 0 }; @@ -5787,37 +5761,12 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) if (!parent_spec) return -ENOMEM; - ret = get_parent_info(rbd_dev, &pii); + ret = rbd_dev_v2_parent_info(rbd_dev, &pii); if (ret) goto out_err; - dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n", - __func__, pii.pool_id, pii.pool_ns, pii.image_id, pii.snap_id, - pii.has_overlap, pii.overlap); - - if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) { - /* - * Either the parent never existed, or we have - * record of it but the image got flattened so it no - * longer has a parent. When the parent of a - * layered image disappears we immediately set the - * overlap to 0. The effect of this is that all new - * requests will be treated as if the image had no - * parent. - * - * If !pii.has_overlap, the parent image spec is not - * applicable. It's there to avoid duplication in each - * snapshot record. - */ - if (rbd_dev->parent_overlap) { - rbd_dev->parent_overlap = 0; - rbd_dev_parent_put(rbd_dev); - pr_info("%s: clone image has been flattened\n", - rbd_dev->disk->disk_name); - } - + if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) goto out; /* No parent? No problem. */ - } /* The ceph file layout needs to fit pool id in 32 bits */ @@ -5829,58 +5778,46 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) } /* - * The parent won't change (except when the clone is - * flattened, already handled that). So we only need to - * record the parent spec we have not already done so. + * The parent won't change except when the clone is flattened, + * so we only need to record the parent image spec once. */ - if (!rbd_dev->parent_spec) { - parent_spec->pool_id = pii.pool_id; - if (pii.pool_ns && *pii.pool_ns) { - parent_spec->pool_ns = pii.pool_ns; - pii.pool_ns = NULL; - } - parent_spec->image_id = pii.image_id; - pii.image_id = NULL; - parent_spec->snap_id = pii.snap_id; - - rbd_dev->parent_spec = parent_spec; - parent_spec = NULL; /* rbd_dev now owns this */ + parent_spec->pool_id = pii.pool_id; + if (pii.pool_ns && *pii.pool_ns) { + parent_spec->pool_ns = pii.pool_ns; + pii.pool_ns = NULL; } + parent_spec->image_id = pii.image_id; + pii.image_id = NULL; + parent_spec->snap_id = pii.snap_id; + + rbd_assert(!rbd_dev->parent_spec); + rbd_dev->parent_spec = parent_spec; + parent_spec = NULL; /* rbd_dev now owns this */ /* - * We always update the parent overlap. If it's zero we issue - * a warning, as we will proceed as if there was no parent. + * Record the parent overlap. If it's zero, issue a warning as + * we will proceed as if there is no parent. */ - if (!pii.overlap) { - if (parent_spec) { - /* refresh, careful to warn just once */ - if (rbd_dev->parent_overlap) - rbd_warn(rbd_dev, - "clone now standalone (overlap became 0)"); - } else { - /* initial probe */ - rbd_warn(rbd_dev, "clone is standalone (overlap 0)"); - } - } + if (!pii.overlap) + rbd_warn(rbd_dev, "clone is standalone (overlap 0)"); rbd_dev->parent_overlap = pii.overlap; out: ret = 0; out_err: - kfree(pii.pool_ns); - kfree(pii.image_id); + rbd_parent_info_cleanup(&pii); rbd_spec_put(parent_spec); return ret; } -static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev) +static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev, + u64 *stripe_unit, u64 *stripe_count) { struct { __le64 stripe_unit; __le64 stripe_count; } __attribute__ ((packed)) striping_info_buf = { 0 }; size_t size = sizeof (striping_info_buf); - void *p; int ret; ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, @@ -5892,27 +5829,33 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev) if (ret < size) return -ERANGE; - p = &striping_info_buf; - rbd_dev->header.stripe_unit = ceph_decode_64(&p); - rbd_dev->header.stripe_count = ceph_decode_64(&p); + *stripe_unit = le64_to_cpu(striping_info_buf.stripe_unit); + *stripe_count = le64_to_cpu(striping_info_buf.stripe_count); + dout(" stripe_unit = %llu stripe_count = %llu\n", *stripe_unit, + *stripe_count); + return 0; } -static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev) +static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev, s64 *data_pool_id) { - __le64 data_pool_id; + __le64 data_pool_buf; int ret; ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, &rbd_dev->header_oloc, "get_data_pool", - NULL, 0, &data_pool_id, sizeof(data_pool_id)); + NULL, 0, &data_pool_buf, + sizeof(data_pool_buf)); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; - if (ret < sizeof(data_pool_id)) + if (ret < sizeof(data_pool_buf)) return -EBADMSG; - rbd_dev->header.data_pool_id = le64_to_cpu(data_pool_id); - WARN_ON(rbd_dev->header.data_pool_id == CEPH_NOPOOL); + *data_pool_id = le64_to_cpu(data_pool_buf); + dout(" data_pool_id = %lld\n", *data_pool_id); + WARN_ON(*data_pool_id == CEPH_NOPOOL); + return 0; } @@ -6104,7 +6047,8 @@ out_err: return ret; } -static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev) +static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, + struct ceph_snap_context **psnapc) { size_t size; int ret; @@ -6165,9 +6109,7 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev) for (i = 0; i < snap_count; i++) snapc->snaps[i] = ceph_decode_64(&p); - ceph_put_snap_context(rbd_dev->header.snapc); - rbd_dev->header.snapc = snapc; - + *psnapc = snapc; dout(" snap context seq = %llu, snap_count = %u\n", (unsigned long long)seq, (unsigned int)snap_count); out: @@ -6216,38 +6158,42 @@ out: return snap_name; } -static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev) +static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev, + struct rbd_image_header *header, + bool first_time) { - bool first_time = rbd_dev->header.object_prefix == NULL; int ret; - ret = rbd_dev_v2_image_size(rbd_dev); + ret = _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP, + first_time ? &header->obj_order : NULL, + &header->image_size); if (ret) return ret; if (first_time) { - ret = rbd_dev_v2_header_onetime(rbd_dev); + ret = rbd_dev_v2_header_onetime(rbd_dev, header); if (ret) return ret; } - ret = rbd_dev_v2_snap_context(rbd_dev); - if (ret && first_time) { - kfree(rbd_dev->header.object_prefix); - rbd_dev->header.object_prefix = NULL; - } + ret = rbd_dev_v2_snap_context(rbd_dev, &header->snapc); + if (ret) + return ret; - return ret; + return 0; } -static int rbd_dev_header_info(struct rbd_device *rbd_dev) +static int rbd_dev_header_info(struct rbd_device *rbd_dev, + struct rbd_image_header *header, + bool first_time) { rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + rbd_assert(!header->object_prefix && !header->snapc); if (rbd_dev->image_format == 1) - return rbd_dev_v1_header_info(rbd_dev); + return rbd_dev_v1_header_info(rbd_dev, header, first_time); - return rbd_dev_v2_header_info(rbd_dev); + return rbd_dev_v2_header_info(rbd_dev, header, first_time); } /* @@ -6735,60 +6681,49 @@ out: */ static void rbd_dev_unprobe(struct rbd_device *rbd_dev) { - struct rbd_image_header *header; - rbd_dev_parent_put(rbd_dev); rbd_object_map_free(rbd_dev); rbd_dev_mapping_clear(rbd_dev); /* Free dynamic fields from the header, then zero it out */ - header = &rbd_dev->header; - ceph_put_snap_context(header->snapc); - kfree(header->snap_sizes); - kfree(header->snap_names); - kfree(header->object_prefix); - memset(header, 0, sizeof (*header)); + rbd_image_header_cleanup(&rbd_dev->header); } -static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev) +static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev, + struct rbd_image_header *header) { int ret; - ret = rbd_dev_v2_object_prefix(rbd_dev); + ret = rbd_dev_v2_object_prefix(rbd_dev, &header->object_prefix); if (ret) - goto out_err; + return ret; /* * Get the and check features for the image. Currently the * features are assumed to never change. */ - ret = rbd_dev_v2_features(rbd_dev); + ret = _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP, + rbd_is_ro(rbd_dev), &header->features); if (ret) - goto out_err; + return ret; /* If the image supports fancy striping, get its parameters */ - if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) { - ret = rbd_dev_v2_striping_info(rbd_dev); - if (ret < 0) - goto out_err; + if (header->features & RBD_FEATURE_STRIPINGV2) { + ret = rbd_dev_v2_striping_info(rbd_dev, &header->stripe_unit, + &header->stripe_count); + if (ret) + return ret; } - if (rbd_dev->header.features & RBD_FEATURE_DATA_POOL) { - ret = rbd_dev_v2_data_pool(rbd_dev); + if (header->features & RBD_FEATURE_DATA_POOL) { + ret = rbd_dev_v2_data_pool(rbd_dev, &header->data_pool_id); if (ret) - goto out_err; + return ret; } - rbd_init_layout(rbd_dev); return 0; - -out_err: - rbd_dev->header.features = 0; - kfree(rbd_dev->header.object_prefix); - rbd_dev->header.object_prefix = NULL; - return ret; } /* @@ -6983,13 +6918,15 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) if (!depth) down_write(&rbd_dev->header_rwsem); - ret = rbd_dev_header_info(rbd_dev); + ret = rbd_dev_header_info(rbd_dev, &rbd_dev->header, true); if (ret) { if (ret == -ENOENT && !need_watch) rbd_print_dne(rbd_dev, false); goto err_out_probe; } + rbd_init_layout(rbd_dev); + /* * If this image is the one being mapped, we have pool name and * id, image name and id, and snap name - need to fill snap id. @@ -7018,7 +6955,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) } if (rbd_dev->header.features & RBD_FEATURE_LAYERING) { - ret = rbd_dev_v2_parent_info(rbd_dev); + ret = rbd_dev_setup_parent(rbd_dev); if (ret) goto err_out_probe; } @@ -7044,6 +6981,107 @@ err_out_format: return ret; } +static void rbd_dev_update_header(struct rbd_device *rbd_dev, + struct rbd_image_header *header) +{ + rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + rbd_assert(rbd_dev->header.object_prefix); /* !first_time */ + + if (rbd_dev->header.image_size != header->image_size) { + rbd_dev->header.image_size = header->image_size; + + if (!rbd_is_snap(rbd_dev)) { + rbd_dev->mapping.size = header->image_size; + rbd_dev_update_size(rbd_dev); + } + } + + ceph_put_snap_context(rbd_dev->header.snapc); + rbd_dev->header.snapc = header->snapc; + header->snapc = NULL; + + if (rbd_dev->image_format == 1) { + kfree(rbd_dev->header.snap_names); + rbd_dev->header.snap_names = header->snap_names; + header->snap_names = NULL; + + kfree(rbd_dev->header.snap_sizes); + rbd_dev->header.snap_sizes = header->snap_sizes; + header->snap_sizes = NULL; + } +} + +static void rbd_dev_update_parent(struct rbd_device *rbd_dev, + struct parent_image_info *pii) +{ + if (pii->pool_id == CEPH_NOPOOL || !pii->has_overlap) { + /* + * Either the parent never existed, or we have + * record of it but the image got flattened so it no + * longer has a parent. When the parent of a + * layered image disappears we immediately set the + * overlap to 0. The effect of this is that all new + * requests will be treated as if the image had no + * parent. + * + * If !pii.has_overlap, the parent image spec is not + * applicable. It's there to avoid duplication in each + * snapshot record. + */ + if (rbd_dev->parent_overlap) { + rbd_dev->parent_overlap = 0; + rbd_dev_parent_put(rbd_dev); + pr_info("%s: clone has been flattened\n", + rbd_dev->disk->disk_name); + } + } else { + rbd_assert(rbd_dev->parent_spec); + + /* + * Update the parent overlap. If it became zero, issue + * a warning as we will proceed as if there is no parent. + */ + if (!pii->overlap && rbd_dev->parent_overlap) + rbd_warn(rbd_dev, + "clone has become standalone (overlap 0)"); + rbd_dev->parent_overlap = pii->overlap; + } +} + +static int rbd_dev_refresh(struct rbd_device *rbd_dev) +{ + struct rbd_image_header header = { 0 }; + struct parent_image_info pii = { 0 }; + int ret; + + dout("%s rbd_dev %p\n", __func__, rbd_dev); + + ret = rbd_dev_header_info(rbd_dev, &header, false); + if (ret) + goto out; + + /* + * If there is a parent, see if it has disappeared due to the + * mapped image getting flattened. + */ + if (rbd_dev->parent) { + ret = rbd_dev_v2_parent_info(rbd_dev, &pii); + if (ret) + goto out; + } + + down_write(&rbd_dev->header_rwsem); + rbd_dev_update_header(rbd_dev, &header); + if (rbd_dev->parent) + rbd_dev_update_parent(rbd_dev, &pii); + up_write(&rbd_dev->header_rwsem); + +out: + rbd_parent_info_cleanup(&pii); + rbd_image_header_cleanup(&header); + return ret; +} + static ssize_t do_rbd_add(struct bus_type *bus, const char *buf, size_t count) diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index 60051c0cabeaa..e322a326546b5 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -81,7 +81,8 @@ MODULE_PARM_DESC(exclusive_login, "Exclusive login to sbp2 device " * * - power condition * Set the power condition field in the START STOP UNIT commands sent by - * sd_mod on suspend, resume, and shutdown (if manage_start_stop is on). + * sd_mod on suspend, resume, and shutdown (if manage_system_start_stop or + * manage_runtime_start_stop is on). * Some disks need this to spin down or to resume properly. * * - override internal blacklist @@ -1517,8 +1518,10 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev) sdev->use_10_for_rw = 1; - if (sbp2_param_exclusive_login) - sdev->manage_start_stop = 1; + if (sbp2_param_exclusive_login) { + sdev->manage_system_start_stop = true; + sdev->manage_runtime_start_stop = true; + } if (sdev->type == TYPE_ROM) sdev->use_10_for_ms = 1; diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c index 318a7d95a1a8b..42d3e1cf73528 100644 --- a/drivers/gpio/gpio-aspeed.c +++ b/drivers/gpio/gpio-aspeed.c @@ -963,7 +963,7 @@ static int aspeed_gpio_set_config(struct gpio_chip *chip, unsigned int offset, else if (param == PIN_CONFIG_BIAS_DISABLE || param == PIN_CONFIG_BIAS_PULL_DOWN || param == PIN_CONFIG_DRIVE_STRENGTH) - return pinctrl_gpio_set_config(offset, config); + return pinctrl_gpio_set_config(chip->base + offset, config); else if (param == PIN_CONFIG_DRIVE_OPEN_DRAIN || param == PIN_CONFIG_DRIVE_OPEN_SOURCE) /* Return -ENOTSUPP to trigger emulation, as per datasheet */ diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c index 1198ab0305d03..b90357774dc04 100644 --- a/drivers/gpio/gpio-pxa.c +++ b/drivers/gpio/gpio-pxa.c @@ -243,6 +243,7 @@ static bool pxa_gpio_has_pinctrl(void) switch (gpio_type) { case PXA3XX_GPIO: case MMP2_GPIO: + case MMP_GPIO: return false; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5f5999cea7d2c..92fa2faf63e41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2179,7 +2179,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) adev->flags |= AMD_IS_PX; if (!(adev->flags & AMD_IS_APU)) { - parent = pci_upstream_bridge(adev->pdev); + parent = pcie_find_root_port(adev->pdev); adev->has_pr3 = parent ? pci_pr3_present(parent) : false; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 18274ff5082ad..339f1f5a08339 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2344,14 +2344,62 @@ static int dm_late_init(void *handle) return detect_mst_link_for_all_connectors(adev_to_drm(adev)); } +static void resume_mst_branch_status(struct drm_dp_mst_topology_mgr *mgr) +{ + int ret; + u8 guid[16]; + u64 tmp64; + + mutex_lock(&mgr->lock); + if (!mgr->mst_primary) + goto out_fail; + + if (drm_dp_read_dpcd_caps(mgr->aux, mgr->dpcd) < 0) { + drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n"); + goto out_fail; + } + + ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, + DP_MST_EN | + DP_UP_REQ_EN | + DP_UPSTREAM_IS_SRC); + if (ret < 0) { + drm_dbg_kms(mgr->dev, "mst write failed - undocked during suspend?\n"); + goto out_fail; + } + + /* Some hubs forget their guids after they resume */ + ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16); + if (ret != 16) { + drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n"); + goto out_fail; + } + + if (memchr_inv(guid, 0, 16) == NULL) { + tmp64 = get_jiffies_64(); + memcpy(&guid[0], &tmp64, sizeof(u64)); + memcpy(&guid[8], &tmp64, sizeof(u64)); + + ret = drm_dp_dpcd_write(mgr->aux, DP_GUID, guid, 16); + + if (ret != 16) { + drm_dbg_kms(mgr->dev, "check mstb guid failed - undocked during suspend?\n"); + goto out_fail; + } + } + + memcpy(mgr->mst_primary->guid, guid, 16); + +out_fail: + mutex_unlock(&mgr->lock); +} + static void s3_handle_mst(struct drm_device *dev, bool suspend) { struct amdgpu_dm_connector *aconnector; struct drm_connector *connector; struct drm_connector_list_iter iter; struct drm_dp_mst_topology_mgr *mgr; - int ret; - bool need_hotplug = false; drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { @@ -2373,18 +2421,15 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend) if (!dp_is_lttpr_present(aconnector->dc_link)) dc_link_aux_try_to_configure_timeout(aconnector->dc_link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD); - ret = drm_dp_mst_topology_mgr_resume(mgr, true); - if (ret < 0) { - dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx, - aconnector->dc_link); - need_hotplug = true; - } + /* TODO: move resume_mst_branch_status() into drm mst resume again + * once topology probing work is pulled out from mst resume into mst + * resume 2nd step. mst resume 2nd step should be called after old + * state getting restored (i.e. drm_atomic_helper_resume()). + */ + resume_mst_branch_status(mgr); } } drm_connector_list_iter_end(&iter); - - if (need_hotplug) - drm_kms_helper_hotplug_event(dev); } static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev) @@ -2773,7 +2818,8 @@ static int dm_resume(void *handle) struct dm_atomic_state *dm_state = to_dm_atomic_state(dm->atomic_obj.state); enum dc_connection_type new_connection_type = dc_connection_none; struct dc_state *dc_state; - int i, r, j; + int i, r, j, ret; + bool need_hotplug = false; if (amdgpu_in_reset(adev)) { dc_state = dm->cached_dc_state; @@ -2871,7 +2917,7 @@ static int dm_resume(void *handle) continue; /* - * this is the case when traversing through already created + * this is the case when traversing through already created end sink * MST connectors, should be skipped */ if (aconnector && aconnector->mst_port) @@ -2931,6 +2977,27 @@ static int dm_resume(void *handle) dm->cached_state = NULL; + /* Do mst topology probing after resuming cached state*/ + drm_connector_list_iter_begin(ddev, &iter); + drm_for_each_connector_iter(connector, &iter) { + aconnector = to_amdgpu_dm_connector(connector); + if (aconnector->dc_link->type != dc_connection_mst_branch || + aconnector->mst_port) + continue; + + ret = drm_dp_mst_topology_mgr_resume(&aconnector->mst_mgr, true); + + if (ret < 0) { + dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx, + aconnector->dc_link); + need_hotplug = true; + } + } + drm_connector_list_iter_end(&iter); + + if (need_hotplug) + drm_kms_helper_hotplug_event(ddev); + amdgpu_dm_irq_resume_late(adev); amdgpu_dm_smu_write_watermarks_table(adev); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 839a812e0da32..fbc4d706748b7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -2081,36 +2081,41 @@ static int sienna_cichlid_display_disable_memory_clock_switch(struct smu_context return ret; } +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu, uint32_t pcie_gen_cap, uint32_t pcie_width_cap) { struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; struct smu_11_0_pcie_table *pcie_table = &dpm_context->dpm_tables.pcie_table; - u32 smu_pcie_arg; + uint8_t *table_member1, *table_member2; + uint32_t min_gen_speed, max_gen_speed; + uint32_t min_lane_width, max_lane_width; + uint32_t smu_pcie_arg; int ret, i; - /* PCIE gen speed and lane width override */ - if (!amdgpu_device_pcie_dynamic_switching_supported()) { - if (pcie_table->pcie_gen[NUM_LINK_LEVELS - 1] < pcie_gen_cap) - pcie_gen_cap = pcie_table->pcie_gen[NUM_LINK_LEVELS - 1]; + GET_PPTABLE_MEMBER(PcieGenSpeed, &table_member1); + GET_PPTABLE_MEMBER(PcieLaneCount, &table_member2); - if (pcie_table->pcie_lane[NUM_LINK_LEVELS - 1] < pcie_width_cap) - pcie_width_cap = pcie_table->pcie_lane[NUM_LINK_LEVELS - 1]; + min_gen_speed = MAX(0, table_member1[0]); + max_gen_speed = MIN(pcie_gen_cap, table_member1[1]); + min_gen_speed = min_gen_speed > max_gen_speed ? + max_gen_speed : min_gen_speed; + min_lane_width = MAX(1, table_member2[0]); + max_lane_width = MIN(pcie_width_cap, table_member2[1]); + min_lane_width = min_lane_width > max_lane_width ? + max_lane_width : min_lane_width; - /* Force all levels to use the same settings */ - for (i = 0; i < NUM_LINK_LEVELS; i++) { - pcie_table->pcie_gen[i] = pcie_gen_cap; - pcie_table->pcie_lane[i] = pcie_width_cap; - } + if (!amdgpu_device_pcie_dynamic_switching_supported()) { + pcie_table->pcie_gen[0] = max_gen_speed; + pcie_table->pcie_lane[0] = max_lane_width; } else { - for (i = 0; i < NUM_LINK_LEVELS; i++) { - if (pcie_table->pcie_gen[i] > pcie_gen_cap) - pcie_table->pcie_gen[i] = pcie_gen_cap; - if (pcie_table->pcie_lane[i] > pcie_width_cap) - pcie_table->pcie_lane[i] = pcie_width_cap; - } + pcie_table->pcie_gen[0] = min_gen_speed; + pcie_table->pcie_lane[0] = min_lane_width; } + pcie_table->pcie_gen[1] = max_gen_speed; + pcie_table->pcie_lane[1] = max_lane_width; for (i = 0; i < NUM_LINK_LEVELS; i++) { smu_pcie_arg = (i << 16 | diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index 03691cdcfb8e1..f7f7252d839ee 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -3074,6 +3074,8 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id) return ret; err: + usb_free_urb(sc->ghl_urb); + hid_hw_stop(hdev); return ret; } diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index 55cb25038e632..710fda5f19e1c 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c @@ -133,6 +133,14 @@ static int enable_gpe(struct device *dev) } wakeup = &adev->wakeup; + /* + * Call acpi_disable_gpe(), so that reference count + * gpe_event_info->runtime_count doesn't overflow. + * When gpe_event_info->runtime_count = 0, the call + * to acpi_disable_gpe() simply return. + */ + acpi_disable_gpe(wakeup->gpe_device, wakeup->gpe_number); + acpi_sts = acpi_enable_gpe(wakeup->gpe_device, wakeup->gpe_number); if (ACPI_FAILURE(acpi_sts)) { dev_err(dev, "enable ose_gpe failed\n"); diff --git a/drivers/hwmon/nzxt-smart2.c b/drivers/hwmon/nzxt-smart2.c index 533f38b0b4e9b..a8e72d8fd0605 100644 --- a/drivers/hwmon/nzxt-smart2.c +++ b/drivers/hwmon/nzxt-smart2.c @@ -791,6 +791,8 @@ static const struct hid_device_id nzxt_smart2_hid_id_table[] = { { HID_USB_DEVICE(0x1e71, 0x2009) }, /* NZXT RGB & Fan Controller */ { HID_USB_DEVICE(0x1e71, 0x200e) }, /* NZXT RGB & Fan Controller */ { HID_USB_DEVICE(0x1e71, 0x2010) }, /* NZXT RGB & Fan Controller */ + { HID_USB_DEVICE(0x1e71, 0x2011) }, /* NZXT RGB & Fan Controller (6 RGB) */ + { HID_USB_DEVICE(0x1e71, 0x2019) }, /* NZXT RGB & Fan Controller (6 RGB) */ {}, }; diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index cfeb24d40d378..bb3d10099ba44 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1430,6 +1430,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &idle_cpu_adl_n), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &idle_cpu_spr), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), @@ -1862,6 +1863,7 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) skx_idle_state_table_update(); break; case INTEL_FAM6_SAPPHIRERAPIDS_X: + case INTEL_FAM6_EMERALDRAPIDS_X: spr_idle_state_table_update(); break; case INTEL_FAM6_ALDERLAKE: diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 4632b1833381a..0773ca7ace247 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4936,7 +4936,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, int err = 0; struct sockaddr *addr = (struct sockaddr *)&mc->addr; struct net_device *ndev = NULL; - struct ib_sa_multicast ib; + struct ib_sa_multicast ib = {}; enum ib_gid_type gid_type; bool send_only; diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 7b68b3ea979f7..f2fb2d8a65970 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -217,7 +217,7 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group, return -ENOMEM; for (i = 0; i < ports_num; i++) { - char port_str[10]; + char port_str[11]; ports[i].port_num = i + 1; snprintf(port_str, sizeof(port_str), "%u", i + 1); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 222733a83ddb7..1adf20198afd1 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -2501,6 +2501,7 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { }, [RDMA_NLDEV_CMD_SYS_SET] = { .doit = nldev_set_sys_set_doit, + .flags = RDMA_NL_ADMIN_PERM, }, [RDMA_NLDEV_CMD_STAT_SET] = { .doit = nldev_stat_set_doit, diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index fa937cd268219..6fe825800494c 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -535,7 +535,7 @@ static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, if (hdr->in_words * 4 != count) return -EINVAL; - if (count < method_elm->req_size + sizeof(hdr)) { + if (count < method_elm->req_size + sizeof(*hdr)) { /* * rdma-core v18 and v19 have a bug where they send DESTROY_CQ * with a 16 byte write instead of 24. Old kernels didn't diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 24ee79aa2122e..88f534cf690e9 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -223,7 +223,7 @@ void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, static int add_port_entries(struct mlx4_ib_dev *device, int port_num) { int i; - char buff[11]; + char buff[12]; struct mlx4_ib_iov_port *port = NULL; int ret = 0 ; struct ib_port_attr attr; diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 5a13d902b0641..1022cebd0a46e 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -2471,8 +2471,8 @@ destroy_res: mlx5_steering_anchor_destroy_res(ft_prio); put_flow_table: put_flow_table(dev, ft_prio, true); - mutex_unlock(&dev->flow_db->lock); free_obj: + mutex_unlock(&dev->flow_db->lock); kfree(obj); return err; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 3178df55c4d85..0baf3b5518b46 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2074,7 +2074,7 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) case MLX5_IB_MMAP_DEVICE_MEM: return "Device Memory"; default: - return NULL; + return "Unknown"; } } diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 552d8271e423b..dc679c34ceefa 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -973,6 +973,7 @@ static void siw_accept_newconn(struct siw_cep *cep) siw_cep_put(cep); new_cep->listen_cep = NULL; if (rv) { + siw_cancel_mpatimer(new_cep); siw_cep_set_free(new_cep); goto error; } @@ -1097,9 +1098,12 @@ static void siw_cm_work_handler(struct work_struct *w) /* * Socket close before MPA request received. */ - siw_dbg_cep(cep, "no mpareq: drop listener\n"); - siw_cep_put(cep->listen_cep); - cep->listen_cep = NULL; + if (cep->listen_cep) { + siw_dbg_cep(cep, + "no mpareq: drop listener\n"); + siw_cep_put(cep->listen_cep); + cep->listen_cep = NULL; + } } } release_cep = 1; @@ -1222,7 +1226,11 @@ static void siw_cm_llp_data_ready(struct sock *sk) if (!cep) goto out; - siw_dbg_cep(cep, "state: %d\n", cep->state); + siw_dbg_cep(cep, "cep state: %d, socket state %d\n", + cep->state, sk->sk_state); + + if (sk->sk_state != TCP_ESTABLISHED) + goto out; switch (cep->state) { case SIW_EPSTATE_RDMA_MODE: diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index a7580c4855fec..c4dcef76e9646 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2789,7 +2789,6 @@ static int srp_abort(struct scsi_cmnd *scmnd) u32 tag; u16 ch_idx; struct srp_rdma_ch *ch; - int ret; shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); @@ -2803,19 +2802,14 @@ static int srp_abort(struct scsi_cmnd *scmnd) shost_printk(KERN_ERR, target->scsi_host, "Sending SRP abort for tag %#x\n", tag); if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun, - SRP_TSK_ABORT_TASK, NULL) == 0) - ret = SUCCESS; - else if (target->rport->state == SRP_RPORT_LOST) - ret = FAST_IO_FAIL; - else - ret = FAILED; - if (ret == SUCCESS) { + SRP_TSK_ABORT_TASK, NULL) == 0) { srp_free_req(ch, req, scmnd, 0); - scmnd->result = DID_ABORT << 16; - scsi_done(scmnd); + return SUCCESS; } + if (target->rport->state == SRP_RPORT_LOST) + return FAST_IO_FAIL; - return ret; + return FAILED; } static int srp_reset_device(struct scsi_cmnd *scmnd) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index db33dc87f69ed..8966f7d5aab61 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1886,13 +1886,23 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, /* Get the leaf page size */ tg = __ffs(smmu_domain->domain.pgsize_bitmap); + num_pages = size >> tg; + /* Convert page size of 12,14,16 (log2) to 1,2,3 */ cmd->tlbi.tg = (tg - 10) / 2; - /* Determine what level the granule is at */ - cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3)); - - num_pages = size >> tg; + /* + * Determine what level the granule is at. For non-leaf, both + * io-pgtable and SVA pass a nominal last-level granule because + * they don't know what level(s) actually apply, so ignore that + * and leave TTL=0. However for various errata reasons we still + * want to use a range command, so avoid the SVA corner case + * where both scale and num could be 0 as well. + */ + if (cmd->tlbi.leaf) + cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3)); + else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1) + num_pages++; } cmds.num = 0; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d4b5d20bd6dda..5c4f5aa8e87e4 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3163,13 +3163,6 @@ static int iommu_suspend(void) struct intel_iommu *iommu = NULL; unsigned long flag; - for_each_active_iommu(iommu, drhd) { - iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32), - GFP_KERNEL); - if (!iommu->iommu_state) - goto nomem; - } - iommu_flush_all(); for_each_active_iommu(iommu, drhd) { @@ -3189,12 +3182,6 @@ static int iommu_suspend(void) raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } return 0; - -nomem: - for_each_active_iommu(iommu, drhd) - kfree(iommu->iommu_state); - - return -ENOMEM; } static void iommu_resume(void) @@ -3226,9 +3213,6 @@ static void iommu_resume(void) raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } - - for_each_active_iommu(iommu, drhd) - kfree(iommu->iommu_state); } static struct syscore_ops iommu_syscore_ops = { diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index db9df7c3790cd..c99cb715bd9a2 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -595,7 +595,7 @@ struct intel_iommu { struct iopf_queue *iopf_queue; unsigned char iopfq_name[16]; struct q_inval *qi; /* Queued invalidation info */ - u32 *iommu_state; /* Store iommu states between suspend and resume.*/ + u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/ #ifdef CONFIG_IRQ_REMAP struct ir_table *ir_table; /* Interrupt remapping info */ diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 9673cd60c84fc..0ba2a63a9538a 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -223,7 +223,7 @@ struct mtk_iommu_data { struct device *smicomm_dev; struct mtk_iommu_bank_data *bank; - struct mtk_iommu_domain *share_dom; /* For 2 HWs share pgtable */ + struct mtk_iommu_domain *share_dom; struct regmap *pericfg; struct mutex mutex; /* Protect m4u_group/m4u_dom above */ @@ -579,8 +579,8 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, struct mtk_iommu_domain *share_dom = data->share_dom; const struct mtk_iommu_iova_region *region; - /* Always use share domain in sharing pgtable case */ - if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE) && share_dom) { + /* Share pgtable when 2 MM IOMMU share the pgtable or one IOMMU use multiple iova ranges */ + if (share_dom) { dom->iop = share_dom->iop; dom->cfg = share_dom->cfg; dom->domain.pgsize_bitmap = share_dom->cfg.pgsize_bitmap; @@ -613,8 +613,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, /* Update our support page sizes bitmap */ dom->domain.pgsize_bitmap = dom->cfg.pgsize_bitmap; - if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) - data->share_dom = dom; + data->share_dom = dom; update_iova_region: /* Update the iova region for this domain */ diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index aad8bc44459fe..d94d60b526461 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -424,10 +424,6 @@ int led_compose_name(struct device *dev, struct led_init_data *init_data, led_parse_fwnode_props(dev, fwnode, &props); - /* We want to label LEDs that can produce full range of colors - * as RGB, not multicolor */ - BUG_ON(props.color == LED_COLOR_ID_MULTI); - if (props.label) { /* * If init_data.devicename is NULL, then it indicates that diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 95b132b52f332..4abe1e2f8ad81 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -748,17 +748,16 @@ err: /* * Cleanup zoned device information. */ -static void dmz_put_zoned_device(struct dm_target *ti) +static void dmz_put_zoned_devices(struct dm_target *ti) { struct dmz_target *dmz = ti->private; int i; - for (i = 0; i < dmz->nr_ddevs; i++) { - if (dmz->ddev[i]) { + for (i = 0; i < dmz->nr_ddevs; i++) + if (dmz->ddev[i]) dm_put_device(ti, dmz->ddev[i]); - dmz->ddev[i] = NULL; - } - } + + kfree(dmz->ddev); } static int dmz_fixup_devices(struct dm_target *ti) @@ -948,7 +947,7 @@ err_bio: err_meta: dmz_dtr_metadata(dmz->metadata); err_dev: - dmz_put_zoned_device(ti); + dmz_put_zoned_devices(ti); err: kfree(dmz->dev); kfree(dmz); @@ -978,7 +977,7 @@ static void dmz_dtr(struct dm_target *ti) bioset_exit(&dmz->bio_set); - dmz_put_zoned_device(ti); + dmz_put_zoned_devices(ti); mutex_destroy(&dmz->chunk_lock); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index fbef3c9badb65..98d4e93efa31c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -854,6 +854,13 @@ struct stripe_head *raid5_get_active_stripe(struct r5conf *conf, set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); r5l_wake_reclaim(conf->log, 0); + + /* release batch_last before wait to avoid risk of deadlock */ + if (ctx && ctx->batch_last) { + raid5_release_stripe(ctx->batch_last); + ctx->batch_last = NULL; + } + wait_event_lock_irq(conf->wait_for_stripe, is_inactive_blocked(conf, hash), *(conf->hash_locks + hash)); diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 1662c12e24ada..6fbd77dc1d18f 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -893,6 +893,13 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, return -EINVAL; } + /* UBI cannot work on flashes with zero erasesize. */ + if (!mtd->erasesize) { + pr_err("ubi: refuse attaching mtd%d - zero erasesize flash is not supported\n", + mtd->index); + return -EINVAL; + } + if (ubi_num == UBI_DEV_NUM_AUTO) { /* Search for an empty slot in the @ubi_devices array */ for (ubi_num = 0; ubi_num < UBI_MAX_DEVICES; ubi_num++) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index a73008b9e0b3c..ba906dfab055c 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3012,14 +3012,16 @@ static void mv88e6xxx_hardware_reset(struct mv88e6xxx_chip *chip) * from the wrong location resulting in the switch booting * to wrong mode and inoperable. */ - mv88e6xxx_g1_wait_eeprom_done(chip); + if (chip->info->ops->get_eeprom) + mv88e6xxx_g2_eeprom_wait(chip); gpiod_set_value_cansleep(gpiod, 1); usleep_range(10000, 20000); gpiod_set_value_cansleep(gpiod, 0); usleep_range(10000, 20000); - mv88e6xxx_g1_wait_eeprom_done(chip); + if (chip->info->ops->get_eeprom) + mv88e6xxx_g2_eeprom_wait(chip); } } diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c index 5848112036b08..964928285782c 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.c +++ b/drivers/net/dsa/mv88e6xxx/global1.c @@ -75,37 +75,6 @@ static int mv88e6xxx_g1_wait_init_ready(struct mv88e6xxx_chip *chip) return mv88e6xxx_g1_wait_bit(chip, MV88E6XXX_G1_STS, bit, 1); } -void mv88e6xxx_g1_wait_eeprom_done(struct mv88e6xxx_chip *chip) -{ - const unsigned long timeout = jiffies + 1 * HZ; - u16 val; - int err; - - /* Wait up to 1 second for the switch to finish reading the - * EEPROM. - */ - while (time_before(jiffies, timeout)) { - err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STS, &val); - if (err) { - dev_err(chip->dev, "Error reading status"); - return; - } - - /* If the switch is still resetting, it may not - * respond on the bus, and so MDIO read returns - * 0xffff. Differentiate between that, and waiting for - * the EEPROM to be done by bit 0 being set. - */ - if (val != 0xffff && - val & BIT(MV88E6XXX_G1_STS_IRQ_EEPROM_DONE)) - return; - - usleep_range(1000, 2000); - } - - dev_err(chip->dev, "Timeout waiting for EEPROM done"); -} - /* Offset 0x01: Switch MAC Address Register Bytes 0 & 1 * Offset 0x02: Switch MAC Address Register Bytes 2 & 3 * Offset 0x03: Switch MAC Address Register Bytes 4 & 5 diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h index 65958b2a0d3a3..04b57a21f7868 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.h +++ b/drivers/net/dsa/mv88e6xxx/global1.h @@ -281,7 +281,6 @@ int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr); int mv88e6185_g1_reset(struct mv88e6xxx_chip *chip); int mv88e6352_g1_reset(struct mv88e6xxx_chip *chip); int mv88e6250_g1_reset(struct mv88e6xxx_chip *chip); -void mv88e6xxx_g1_wait_eeprom_done(struct mv88e6xxx_chip *chip); int mv88e6185_g1_ppu_enable(struct mv88e6xxx_chip *chip); int mv88e6185_g1_ppu_disable(struct mv88e6xxx_chip *chip); diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index ec49939968fac..ac302a935ce69 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -340,7 +340,7 @@ int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip) * Offset 0x15: EEPROM Addr (for 8-bit data access) */ -static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip) +int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip) { int bit = __bf_shf(MV88E6XXX_G2_EEPROM_CMD_BUSY); int err; diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h index c05fad5c9f19d..751a6c988de42 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.h +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -359,6 +359,7 @@ int mv88e6xxx_g2_trunk_clear(struct mv88e6xxx_chip *chip); int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, int target, int port); +int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip); extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops; extern const struct mv88e6xxx_irq_ops mv88e6250_watchdog_ops; diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 0b4ec6e41eb41..1d21a281222d9 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1308,24 +1308,23 @@ static void ibmveth_rx_csum_helper(struct sk_buff *skb, * the user space for finding a flow. During this process, OVS computes * checksum on the first packet when CHECKSUM_PARTIAL flag is set. * - * So, re-compute TCP pseudo header checksum when configured for - * trunk mode. + * So, re-compute TCP pseudo header checksum. */ + if (iph_proto == IPPROTO_TCP) { struct tcphdr *tcph = (struct tcphdr *)(skb->data + iphlen); + if (tcph->check == 0x0000) { /* Recompute TCP pseudo header checksum */ - if (adapter->is_active_trunk) { - tcphdrlen = skb->len - iphlen; - if (skb_proto == ETH_P_IP) - tcph->check = - ~csum_tcpudp_magic(iph->saddr, - iph->daddr, tcphdrlen, iph_proto, 0); - else if (skb_proto == ETH_P_IPV6) - tcph->check = - ~csum_ipv6_magic(&iph6->saddr, - &iph6->daddr, tcphdrlen, iph_proto, 0); - } + tcphdrlen = skb->len - iphlen; + if (skb_proto == ETH_P_IP) + tcph->check = + ~csum_tcpudp_magic(iph->saddr, + iph->daddr, tcphdrlen, iph_proto, 0); + else if (skb_proto == ETH_P_IPV6) + tcph->check = + ~csum_ipv6_magic(&iph6->saddr, + &iph6->daddr, tcphdrlen, iph_proto, 0); /* Setup SKB fields for checksum offload */ skb_partial_csum_set(skb, iphlen, offsetof(struct tcphdr, check)); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index ffea0c9c82f1e..97a9efe7b713e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -361,9 +361,9 @@ static int i40e_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) 1000000ULL << 16); if (neg_adj) - adj = I40E_PTP_40GB_INCVAL - diff; + adj = freq - diff; else - adj = I40E_PTP_40GB_INCVAL + diff; + adj = freq + diff; wr32(hw, I40E_PRTTSYN_INC_L, adj & 0xFFFFFFFF); wr32(hw, I40E_PRTTSYN_INC_H, adj >> 32); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 0ac5ae16308f6..17e6ac4445afc 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2862,8 +2862,8 @@ static irqreturn_t mtk_handle_irq_rx(int irq, void *_eth) eth->rx_events++; if (likely(napi_schedule_prep(ð->rx_napi))) { - __napi_schedule(ð->rx_napi); mtk_rx_irq_disable(eth, eth->soc->txrx.rx_irq_done_mask); + __napi_schedule(ð->rx_napi); } return IRQ_HANDLED; @@ -2875,8 +2875,8 @@ static irqreturn_t mtk_handle_irq_tx(int irq, void *_eth) eth->tx_events++; if (likely(napi_schedule_prep(ð->tx_napi))) { - __napi_schedule(ð->tx_napi); mtk_tx_irq_disable(eth, MTK_TX_DONE_INT); + __napi_schedule(ð->tx_napi); } return IRQ_HANDLED; diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h index 0bfc375161ed6..a174c6fc626ac 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h @@ -110,9 +110,9 @@ struct qed_ll2_info { enum core_tx_dest tx_dest; u8 tx_stats_en; bool main_func_queue; + struct qed_ll2_cbs cbs; struct qed_ll2_rx_queue rx_queue; struct qed_ll2_tx_queue tx_queue; - struct qed_ll2_cbs cbs; }; extern const struct qed_ll2_ops qed_ll2_ops_pass; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index 2b38a499a4045..533f5245ad945 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -105,6 +105,7 @@ struct stm32_ops { int (*parse_data)(struct stm32_dwmac *dwmac, struct device *dev); u32 syscfg_eth_mask; + bool clk_rx_enable_in_suspend; }; static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat) @@ -122,7 +123,8 @@ static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat) if (ret) return ret; - if (!dwmac->dev->power.is_suspended) { + if (!dwmac->ops->clk_rx_enable_in_suspend || + !dwmac->dev->power.is_suspended) { ret = clk_prepare_enable(dwmac->clk_rx); if (ret) { clk_disable_unprepare(dwmac->clk_tx); @@ -515,7 +517,8 @@ static struct stm32_ops stm32mp1_dwmac_data = { .suspend = stm32mp1_suspend, .resume = stm32mp1_resume, .parse_data = stm32mp1_parse_data, - .syscfg_eth_mask = SYSCFG_MP1_ETH_MASK + .syscfg_eth_mask = SYSCFG_MP1_ETH_MASK, + .clk_rx_enable_in_suspend = true }; static const struct of_device_id stm32_dwmac_match[] = { diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 25466cbdc16bd..9f2553799895d 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1614,6 +1614,7 @@ static int am65_cpsw_nuss_init_tx_chns(struct am65_cpsw_common *common) if (tx_chn->irq <= 0) { dev_err(dev, "Failed to get tx dma irq %d\n", tx_chn->irq); + ret = tx_chn->irq ?: -ENXIO; goto err; } diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 5d6454fedb3f1..78ad2da3ee29b 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -90,7 +90,9 @@ static int __must_check __smsc75xx_read_reg(struct usbnet *dev, u32 index, ret = fn(dev, USB_VENDOR_REQUEST_READ_REGISTER, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, index, &buf, 4); - if (unlikely(ret < 0)) { + if (unlikely(ret < 4)) { + ret = ret < 0 ? ret : -ENODATA; + netdev_warn(dev->net, "Failed to read reg index 0x%08x: %d\n", index, ret); return ret; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index f6dcec66f0a4b..208df4d419395 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -664,7 +664,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; - rcu_read_lock_bh(); + rcu_read_lock(); nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); if (unlikely(!neigh)) @@ -672,10 +672,10 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, if (!IS_ERR(neigh)) { sock_confirm_neigh(skb, neigh); ret = neigh_output(neigh, skb, false); - rcu_read_unlock_bh(); + rcu_read_unlock(); return ret; } - rcu_read_unlock_bh(); + rcu_read_unlock(); IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); @@ -889,7 +889,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s } } - rcu_read_lock_bh(); + rcu_read_lock(); neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); if (!IS_ERR(neigh)) { @@ -898,11 +898,11 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s sock_confirm_neigh(skb, neigh); /* if crossing protocols, can not use the cached header */ ret = neigh_output(neigh, skb, is_v6gw); - rcu_read_unlock_bh(); + rcu_read_unlock(); return ret; } - rcu_read_unlock_bh(); + rcu_read_unlock(); vrf_tx_error(skb->dev, skb); return -EINVAL; } diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 0c3eb850fcb79..619dd71c9d75e 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1910,7 +1910,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) struct vxlan_fdb *f; struct sk_buff *reply; - if (!(n->nud_state & NUD_CONNECTED)) { + if (!(READ_ONCE(n->nud_state) & NUD_CONNECTED)) { neigh_release(n); goto out; } @@ -2074,7 +2074,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) struct vxlan_fdb *f; struct sk_buff *reply; - if (!(n->nud_state & NUD_CONNECTED)) { + if (!(READ_ONCE(n->nud_state) & NUD_CONNECTED)) { neigh_release(n); goto out; } diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 1c53b55469270..5fec8abe8e1d3 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -34,6 +34,8 @@ #define TDM_PPPOHT_SLIC_MAXIN #define RX_BD_ERRORS (R_CD_S | R_OV_S | R_CR_S | R_AB_S | R_NO_S | R_LG_S) +static int uhdlc_close(struct net_device *dev); + static struct ucc_tdm_info utdm_primary_info = { .uf_info = { .tsa = 0, @@ -708,6 +710,7 @@ static int uhdlc_open(struct net_device *dev) hdlc_device *hdlc = dev_to_hdlc(dev); struct ucc_hdlc_private *priv = hdlc->priv; struct ucc_tdm *utdm = priv->utdm; + int rc = 0; if (priv->hdlc_busy != 1) { if (request_irq(priv->ut_info->uf_info.irq, @@ -731,10 +734,13 @@ static int uhdlc_open(struct net_device *dev) napi_enable(&priv->napi); netdev_reset_queue(dev); netif_start_queue(dev); - hdlc_open(dev); + + rc = hdlc_open(dev); + if (rc) + uhdlc_close(dev); } - return 0; + return rc; } static void uhdlc_memclean(struct ucc_hdlc_private *priv) @@ -824,6 +830,8 @@ static int uhdlc_close(struct net_device *dev) netdev_reset_queue(dev); priv->hdlc_busy = 0; + hdlc_close(dev); + return 0; } diff --git a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h index c62576e442bdf..2d481849a9c23 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h @@ -295,9 +295,9 @@ struct iwl_fw_ini_fifo_hdr { struct iwl_fw_ini_error_dump_range { __le32 range_data_size; union { - __le32 internal_base_addr; - __le64 dram_base_addr; - __le32 page_num; + __le32 internal_base_addr __packed; + __le64 dram_base_addr __packed; + __le32 page_num __packed; struct iwl_fw_ini_fifo_hdr fifo_hdr; struct iwl_cmd_header fw_pkt_hdr; }; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 887d0789c96c3..2e3c98eaa400c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -796,7 +796,7 @@ out: mvm->nvm_data->bands[0].n_channels = 1; mvm->nvm_data->bands[0].n_bitrates = 1; mvm->nvm_data->bands[0].bitrates = - (void *)((u8 *)mvm->nvm_data->channels + 1); + (void *)(mvm->nvm_data->channels + 1); mvm->nvm_data->bands[0].bitrates->hw_value = 10; } diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c index a04b66284af4a..7351acac6932d 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c @@ -965,8 +965,8 @@ void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv, } } - tlv_buf_left -= (sizeof(*tlv_rxba) + tlv_len); - tmp = (u8 *)tlv_rxba + tlv_len + sizeof(*tlv_rxba); + tlv_buf_left -= (sizeof(tlv_rxba->header) + tlv_len); + tmp = (u8 *)tlv_rxba + sizeof(tlv_rxba->header) + tlv_len; tlv_rxba = (struct mwifiex_ie_types_rxba_sync *)tmp; } } diff --git a/drivers/net/wireless/marvell/mwifiex/sta_rx.c b/drivers/net/wireless/marvell/mwifiex/sta_rx.c index 65420ad674167..257737137cd70 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_rx.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_rx.c @@ -86,7 +86,8 @@ int mwifiex_process_rx_packet(struct mwifiex_private *priv, rx_pkt_len = le16_to_cpu(local_rx_pd->rx_pkt_length); rx_pkt_hdr = (void *)local_rx_pd + rx_pkt_off; - if (sizeof(*rx_pkt_hdr) + rx_pkt_off > skb->len) { + if (sizeof(rx_pkt_hdr->eth803_hdr) + sizeof(rfc1042_header) + + rx_pkt_off > skb->len) { mwifiex_dbg(priv->adapter, ERROR, "wrong rx packet offset: len=%d, rx_pkt_off=%d\n", skb->len, rx_pkt_off); @@ -95,12 +96,13 @@ int mwifiex_process_rx_packet(struct mwifiex_private *priv, return -1; } - if ((!memcmp(&rx_pkt_hdr->rfc1042_hdr, bridge_tunnel_header, - sizeof(bridge_tunnel_header))) || - (!memcmp(&rx_pkt_hdr->rfc1042_hdr, rfc1042_header, - sizeof(rfc1042_header)) && - ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_AARP && - ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_IPX)) { + if (sizeof(*rx_pkt_hdr) + rx_pkt_off <= skb->len && + ((!memcmp(&rx_pkt_hdr->rfc1042_hdr, bridge_tunnel_header, + sizeof(bridge_tunnel_header))) || + (!memcmp(&rx_pkt_hdr->rfc1042_hdr, rfc1042_header, + sizeof(rfc1042_header)) && + ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_AARP && + ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_IPX))) { /* * Replace the 803 header and rfc1042 header (llc/snap) with an * EthernetII header, keep the src/dst and snap_type diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c index 0acabba2d1a50..5d402cf2951cb 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c @@ -131,15 +131,8 @@ u8 mt76x02_get_lna_gain(struct mt76x02_dev *dev, s8 *lna_2g, s8 *lna_5g, struct ieee80211_channel *chan) { - u16 val; u8 lna; - val = mt76x02_eeprom_get(dev, MT_EE_NIC_CONF_1); - if (val & MT_EE_NIC_CONF_1_LNA_EXT_2G) - *lna_2g = 0; - if (val & MT_EE_NIC_CONF_1_LNA_EXT_5G) - memset(lna_5g, 0, sizeof(s8) * 3); - if (chan->band == NL80211_BAND_2GHZ) lna = *lna_2g; else if (chan->hw_value <= 64) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c index c57e05a5c65e4..91807bf662dde 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c @@ -256,7 +256,8 @@ void mt76x2_read_rx_gain(struct mt76x02_dev *dev) struct ieee80211_channel *chan = dev->mphy.chandef.chan; int channel = chan->hw_value; s8 lna_5g[3], lna_2g; - u8 lna; + bool use_lna; + u8 lna = 0; u16 val; if (chan->band == NL80211_BAND_2GHZ) @@ -275,7 +276,15 @@ void mt76x2_read_rx_gain(struct mt76x02_dev *dev) dev->cal.rx.mcu_gain |= (lna_5g[1] & 0xff) << 16; dev->cal.rx.mcu_gain |= (lna_5g[2] & 0xff) << 24; - lna = mt76x02_get_lna_gain(dev, &lna_2g, lna_5g, chan); + val = mt76x02_eeprom_get(dev, MT_EE_NIC_CONF_1); + if (chan->band == NL80211_BAND_2GHZ) + use_lna = !(val & MT_EE_NIC_CONF_1_LNA_EXT_2G); + else + use_lna = !(val & MT_EE_NIC_CONF_1_LNA_EXT_5G); + + if (use_lna) + lna = mt76x02_get_lna_gain(dev, &lna_2g, lna_5g, chan); + dev->cal.rx.lna_gain = mt76x02_sign_extend(lna, 8); } EXPORT_SYMBOL_GPL(mt76x2_read_rx_gain); diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 95501b77ef314..0fbf331a748fd 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -902,13 +902,13 @@ int of_changeset_action(struct of_changeset *ocs, unsigned long action, { struct of_changeset_entry *ce; + if (WARN_ON(action >= ARRAY_SIZE(action_names))) + return -EINVAL; + ce = kzalloc(sizeof(*ce), GFP_KERNEL); if (!ce) return -ENOMEM; - if (WARN_ON(action >= ARRAY_SIZE(action_names))) - return -EINVAL; - /* get a reference to the node */ ce->action = action; ce->np = of_node_get(np); diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index d24712a76ba7c..0ccd92faf078a 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -40,7 +40,6 @@ #define PARF_PHY_REFCLK 0x4c #define PARF_CONFIG_BITS 0x50 #define PARF_DBI_BASE_ADDR 0x168 -#define PARF_SLV_ADDR_SPACE_SIZE_2_3_3 0x16c /* Register offset specific to IP ver 2.3.3 */ #define PARF_MHI_CLOCK_RESET_CTRL 0x174 #define PARF_AXI_MSTR_WR_ADDR_HALT 0x178 #define PARF_AXI_MSTR_WR_ADDR_HALT_V2 0x1a8 @@ -1148,8 +1147,7 @@ static int qcom_pcie_post_init_2_3_3(struct qcom_pcie *pcie) u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); u32 val; - writel(SLV_ADDR_SPACE_SZ, - pcie->parf + PARF_SLV_ADDR_SPACE_SIZE_2_3_3); + writel(SLV_ADDR_SPACE_SZ, pcie->parf + PARF_SLV_ADDR_SPACE_SIZE); val = readl(pcie->parf + PARF_PHY_CTRL); val &= ~BIT(0); diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index a48d9b7d29217..8fee9b330b613 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -3532,7 +3532,6 @@ ptp_ocp_device_init(struct ptp_ocp *bp, struct pci_dev *pdev) return 0; out: - ptp_ocp_dev_release(&bp->dev); put_device(&bp->dev); return err; } diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 351f0fd225b14..f6a95f72af18d 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5543,6 +5543,8 @@ regulator_register(struct device *dev, goto rinse; } device_initialize(&rdev->dev); + dev_set_drvdata(&rdev->dev, rdev); + rdev->dev.class = ®ulator_class; spin_lock_init(&rdev->err_lock); /* @@ -5604,11 +5606,9 @@ regulator_register(struct device *dev, rdev->supply_name = regulator_desc->supply_name; /* register with sysfs */ - rdev->dev.class = ®ulator_class; rdev->dev.parent = config->dev; dev_set_name(&rdev->dev, "regulator.%lu", (unsigned long) atomic_inc_return(®ulator_no)); - dev_set_drvdata(&rdev->dev, rdev); /* set regulator constraints */ if (init_data) diff --git a/drivers/regulator/mt6358-regulator.c b/drivers/regulator/mt6358-regulator.c index 8a5ce990f1bf9..a0441b8086712 100644 --- a/drivers/regulator/mt6358-regulator.c +++ b/drivers/regulator/mt6358-regulator.c @@ -35,19 +35,19 @@ struct mt6358_regulator_info { }; #define MT6358_BUCK(match, vreg, min, max, step, \ - volt_ranges, vosel_mask, _da_vsel_reg, _da_vsel_mask, \ + vosel_mask, _da_vsel_reg, _da_vsel_mask, \ _modeset_reg, _modeset_shift) \ [MT6358_ID_##vreg] = { \ .desc = { \ .name = #vreg, \ .of_match = of_match_ptr(match), \ - .ops = &mt6358_volt_range_ops, \ + .ops = &mt6358_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .id = MT6358_ID_##vreg, \ .owner = THIS_MODULE, \ .n_voltages = ((max) - (min)) / (step) + 1, \ - .linear_ranges = volt_ranges, \ - .n_linear_ranges = ARRAY_SIZE(volt_ranges), \ + .min_uV = (min), \ + .uV_step = (step), \ .vsel_reg = MT6358_BUCK_##vreg##_ELR0, \ .vsel_mask = vosel_mask, \ .enable_reg = MT6358_BUCK_##vreg##_CON0, \ @@ -87,7 +87,7 @@ struct mt6358_regulator_info { } #define MT6358_LDO1(match, vreg, min, max, step, \ - volt_ranges, _da_vsel_reg, _da_vsel_mask, \ + _da_vsel_reg, _da_vsel_mask, \ vosel, vosel_mask) \ [MT6358_ID_##vreg] = { \ .desc = { \ @@ -98,8 +98,8 @@ struct mt6358_regulator_info { .id = MT6358_ID_##vreg, \ .owner = THIS_MODULE, \ .n_voltages = ((max) - (min)) / (step) + 1, \ - .linear_ranges = volt_ranges, \ - .n_linear_ranges = ARRAY_SIZE(volt_ranges), \ + .min_uV = (min), \ + .uV_step = (step), \ .vsel_reg = vosel, \ .vsel_mask = vosel_mask, \ .enable_reg = MT6358_LDO_##vreg##_CON0, \ @@ -131,19 +131,19 @@ struct mt6358_regulator_info { } #define MT6366_BUCK(match, vreg, min, max, step, \ - volt_ranges, vosel_mask, _da_vsel_reg, _da_vsel_mask, \ + vosel_mask, _da_vsel_reg, _da_vsel_mask, \ _modeset_reg, _modeset_shift) \ [MT6366_ID_##vreg] = { \ .desc = { \ .name = #vreg, \ .of_match = of_match_ptr(match), \ - .ops = &mt6358_volt_range_ops, \ + .ops = &mt6358_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .id = MT6366_ID_##vreg, \ .owner = THIS_MODULE, \ .n_voltages = ((max) - (min)) / (step) + 1, \ - .linear_ranges = volt_ranges, \ - .n_linear_ranges = ARRAY_SIZE(volt_ranges), \ + .min_uV = (min), \ + .uV_step = (step), \ .vsel_reg = MT6358_BUCK_##vreg##_ELR0, \ .vsel_mask = vosel_mask, \ .enable_reg = MT6358_BUCK_##vreg##_CON0, \ @@ -183,7 +183,7 @@ struct mt6358_regulator_info { } #define MT6366_LDO1(match, vreg, min, max, step, \ - volt_ranges, _da_vsel_reg, _da_vsel_mask, \ + _da_vsel_reg, _da_vsel_mask, \ vosel, vosel_mask) \ [MT6366_ID_##vreg] = { \ .desc = { \ @@ -194,8 +194,8 @@ struct mt6358_regulator_info { .id = MT6366_ID_##vreg, \ .owner = THIS_MODULE, \ .n_voltages = ((max) - (min)) / (step) + 1, \ - .linear_ranges = volt_ranges, \ - .n_linear_ranges = ARRAY_SIZE(volt_ranges), \ + .min_uV = (min), \ + .uV_step = (step), \ .vsel_reg = vosel, \ .vsel_mask = vosel_mask, \ .enable_reg = MT6358_LDO_##vreg##_CON0, \ @@ -226,21 +226,6 @@ struct mt6358_regulator_info { .qi = BIT(15), \ } -static const struct linear_range buck_volt_range1[] = { - REGULATOR_LINEAR_RANGE(500000, 0, 0x7f, 6250), -}; - -static const struct linear_range buck_volt_range2[] = { - REGULATOR_LINEAR_RANGE(500000, 0, 0x7f, 12500), -}; - -static const struct linear_range buck_volt_range3[] = { - REGULATOR_LINEAR_RANGE(500000, 0, 0x3f, 50000), -}; - -static const struct linear_range buck_volt_range4[] = { - REGULATOR_LINEAR_RANGE(1000000, 0, 0x7f, 12500), -}; static const unsigned int vdram2_voltages[] = { 600000, 1800000, @@ -463,9 +448,9 @@ static unsigned int mt6358_regulator_get_mode(struct regulator_dev *rdev) } } -static const struct regulator_ops mt6358_volt_range_ops = { - .list_voltage = regulator_list_voltage_linear_range, - .map_voltage = regulator_map_voltage_linear_range, +static const struct regulator_ops mt6358_buck_ops = { + .list_voltage = regulator_list_voltage_linear, + .map_voltage = regulator_map_voltage_linear, .set_voltage_sel = regulator_set_voltage_sel_regmap, .get_voltage_sel = mt6358_get_buck_voltage_sel, .set_voltage_time_sel = regulator_set_voltage_time_sel, @@ -477,6 +462,18 @@ static const struct regulator_ops mt6358_volt_range_ops = { .get_mode = mt6358_regulator_get_mode, }; +static const struct regulator_ops mt6358_volt_range_ops = { + .list_voltage = regulator_list_voltage_linear, + .map_voltage = regulator_map_voltage_linear, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = mt6358_get_buck_voltage_sel, + .set_voltage_time_sel = regulator_set_voltage_time_sel, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .get_status = mt6358_get_status, +}; + static const struct regulator_ops mt6358_volt_table_ops = { .list_voltage = regulator_list_voltage_table, .map_voltage = regulator_map_voltage_iterate, @@ -500,35 +497,23 @@ static const struct regulator_ops mt6358_volt_fixed_ops = { /* The array is indexed by id(MT6358_ID_XXX) */ static struct mt6358_regulator_info mt6358_regulators[] = { MT6358_BUCK("buck_vdram1", VDRAM1, 500000, 2087500, 12500, - buck_volt_range2, 0x7f, MT6358_BUCK_VDRAM1_DBG0, 0x7f, - MT6358_VDRAM1_ANA_CON0, 8), + 0x7f, MT6358_BUCK_VDRAM1_DBG0, 0x7f, MT6358_VDRAM1_ANA_CON0, 8), MT6358_BUCK("buck_vcore", VCORE, 500000, 1293750, 6250, - buck_volt_range1, 0x7f, MT6358_BUCK_VCORE_DBG0, 0x7f, - MT6358_VCORE_VGPU_ANA_CON0, 1), - MT6358_BUCK("buck_vcore_sshub", VCORE_SSHUB, 500000, 1293750, 6250, - buck_volt_range1, 0x7f, MT6358_BUCK_VCORE_SSHUB_ELR0, 0x7f, - MT6358_VCORE_VGPU_ANA_CON0, 1), + 0x7f, MT6358_BUCK_VCORE_DBG0, 0x7f, MT6358_VCORE_VGPU_ANA_CON0, 1), MT6358_BUCK("buck_vpa", VPA, 500000, 3650000, 50000, - buck_volt_range3, 0x3f, MT6358_BUCK_VPA_DBG0, 0x3f, - MT6358_VPA_ANA_CON0, 3), + 0x3f, MT6358_BUCK_VPA_DBG0, 0x3f, MT6358_VPA_ANA_CON0, 3), MT6358_BUCK("buck_vproc11", VPROC11, 500000, 1293750, 6250, - buck_volt_range1, 0x7f, MT6358_BUCK_VPROC11_DBG0, 0x7f, - MT6358_VPROC_ANA_CON0, 1), + 0x7f, MT6358_BUCK_VPROC11_DBG0, 0x7f, MT6358_VPROC_ANA_CON0, 1), MT6358_BUCK("buck_vproc12", VPROC12, 500000, 1293750, 6250, - buck_volt_range1, 0x7f, MT6358_BUCK_VPROC12_DBG0, 0x7f, - MT6358_VPROC_ANA_CON0, 2), + 0x7f, MT6358_BUCK_VPROC12_DBG0, 0x7f, MT6358_VPROC_ANA_CON0, 2), MT6358_BUCK("buck_vgpu", VGPU, 500000, 1293750, 6250, - buck_volt_range1, 0x7f, MT6358_BUCK_VGPU_ELR0, 0x7f, - MT6358_VCORE_VGPU_ANA_CON0, 2), + 0x7f, MT6358_BUCK_VGPU_ELR0, 0x7f, MT6358_VCORE_VGPU_ANA_CON0, 2), MT6358_BUCK("buck_vs2", VS2, 500000, 2087500, 12500, - buck_volt_range2, 0x7f, MT6358_BUCK_VS2_DBG0, 0x7f, - MT6358_VS2_ANA_CON0, 8), + 0x7f, MT6358_BUCK_VS2_DBG0, 0x7f, MT6358_VS2_ANA_CON0, 8), MT6358_BUCK("buck_vmodem", VMODEM, 500000, 1293750, 6250, - buck_volt_range1, 0x7f, MT6358_BUCK_VMODEM_DBG0, 0x7f, - MT6358_VMODEM_ANA_CON0, 8), + 0x7f, MT6358_BUCK_VMODEM_DBG0, 0x7f, MT6358_VMODEM_ANA_CON0, 8), MT6358_BUCK("buck_vs1", VS1, 1000000, 2587500, 12500, - buck_volt_range4, 0x7f, MT6358_BUCK_VS1_DBG0, 0x7f, - MT6358_VS1_ANA_CON0, 8), + 0x7f, MT6358_BUCK_VS1_DBG0, 0x7f, MT6358_VS1_ANA_CON0, 8), MT6358_REG_FIXED("ldo_vrf12", VRF12, MT6358_LDO_VRF12_CON0, 0, 1200000), MT6358_REG_FIXED("ldo_vio18", VIO18, @@ -582,55 +567,35 @@ static struct mt6358_regulator_info mt6358_regulators[] = { MT6358_LDO("ldo_vsim2", VSIM2, vsim_voltages, vsim_idx, MT6358_LDO_VSIM2_CON0, 0, MT6358_VSIM2_ANA_CON0, 0xf00), MT6358_LDO1("ldo_vsram_proc11", VSRAM_PROC11, 500000, 1293750, 6250, - buck_volt_range1, MT6358_LDO_VSRAM_PROC11_DBG0, 0x7f00, - MT6358_LDO_VSRAM_CON0, 0x7f), + MT6358_LDO_VSRAM_PROC11_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON0, 0x7f), MT6358_LDO1("ldo_vsram_others", VSRAM_OTHERS, 500000, 1293750, 6250, - buck_volt_range1, MT6358_LDO_VSRAM_OTHERS_DBG0, 0x7f00, - MT6358_LDO_VSRAM_CON2, 0x7f), - MT6358_LDO1("ldo_vsram_others_sshub", VSRAM_OTHERS_SSHUB, 500000, - 1293750, 6250, buck_volt_range1, - MT6358_LDO_VSRAM_OTHERS_SSHUB_CON1, 0x7f, - MT6358_LDO_VSRAM_OTHERS_SSHUB_CON1, 0x7f), + MT6358_LDO_VSRAM_OTHERS_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON2, 0x7f), MT6358_LDO1("ldo_vsram_gpu", VSRAM_GPU, 500000, 1293750, 6250, - buck_volt_range1, MT6358_LDO_VSRAM_GPU_DBG0, 0x7f00, - MT6358_LDO_VSRAM_CON3, 0x7f), + MT6358_LDO_VSRAM_GPU_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON3, 0x7f), MT6358_LDO1("ldo_vsram_proc12", VSRAM_PROC12, 500000, 1293750, 6250, - buck_volt_range1, MT6358_LDO_VSRAM_PROC12_DBG0, 0x7f00, - MT6358_LDO_VSRAM_CON1, 0x7f), + MT6358_LDO_VSRAM_PROC12_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON1, 0x7f), }; /* The array is indexed by id(MT6366_ID_XXX) */ static struct mt6358_regulator_info mt6366_regulators[] = { MT6366_BUCK("buck_vdram1", VDRAM1, 500000, 2087500, 12500, - buck_volt_range2, 0x7f, MT6358_BUCK_VDRAM1_DBG0, 0x7f, - MT6358_VDRAM1_ANA_CON0, 8), + 0x7f, MT6358_BUCK_VDRAM1_DBG0, 0x7f, MT6358_VDRAM1_ANA_CON0, 8), MT6366_BUCK("buck_vcore", VCORE, 500000, 1293750, 6250, - buck_volt_range1, 0x7f, MT6358_BUCK_VCORE_DBG0, 0x7f, - MT6358_VCORE_VGPU_ANA_CON0, 1), - MT6366_BUCK("buck_vcore_sshub", VCORE_SSHUB, 500000, 1293750, 6250, - buck_volt_range1, 0x7f, MT6358_BUCK_VCORE_SSHUB_ELR0, 0x7f, - MT6358_VCORE_VGPU_ANA_CON0, 1), + 0x7f, MT6358_BUCK_VCORE_DBG0, 0x7f, MT6358_VCORE_VGPU_ANA_CON0, 1), MT6366_BUCK("buck_vpa", VPA, 500000, 3650000, 50000, - buck_volt_range3, 0x3f, MT6358_BUCK_VPA_DBG0, 0x3f, - MT6358_VPA_ANA_CON0, 3), + 0x3f, MT6358_BUCK_VPA_DBG0, 0x3f, MT6358_VPA_ANA_CON0, 3), MT6366_BUCK("buck_vproc11", VPROC11, 500000, 1293750, 6250, - buck_volt_range1, 0x7f, MT6358_BUCK_VPROC11_DBG0, 0x7f, - MT6358_VPROC_ANA_CON0, 1), + 0x7f, MT6358_BUCK_VPROC11_DBG0, 0x7f, MT6358_VPROC_ANA_CON0, 1), MT6366_BUCK("buck_vproc12", VPROC12, 500000, 1293750, 6250, - buck_volt_range1, 0x7f, MT6358_BUCK_VPROC12_DBG0, 0x7f, - MT6358_VPROC_ANA_CON0, 2), + 0x7f, MT6358_BUCK_VPROC12_DBG0, 0x7f, MT6358_VPROC_ANA_CON0, 2), MT6366_BUCK("buck_vgpu", VGPU, 500000, 1293750, 6250, - buck_volt_range1, 0x7f, MT6358_BUCK_VGPU_ELR0, 0x7f, - MT6358_VCORE_VGPU_ANA_CON0, 2), + 0x7f, MT6358_BUCK_VGPU_ELR0, 0x7f, MT6358_VCORE_VGPU_ANA_CON0, 2), MT6366_BUCK("buck_vs2", VS2, 500000, 2087500, 12500, - buck_volt_range2, 0x7f, MT6358_BUCK_VS2_DBG0, 0x7f, - MT6358_VS2_ANA_CON0, 8), + 0x7f, MT6358_BUCK_VS2_DBG0, 0x7f, MT6358_VS2_ANA_CON0, 8), MT6366_BUCK("buck_vmodem", VMODEM, 500000, 1293750, 6250, - buck_volt_range1, 0x7f, MT6358_BUCK_VMODEM_DBG0, 0x7f, - MT6358_VMODEM_ANA_CON0, 8), + 0x7f, MT6358_BUCK_VMODEM_DBG0, 0x7f, MT6358_VMODEM_ANA_CON0, 8), MT6366_BUCK("buck_vs1", VS1, 1000000, 2587500, 12500, - buck_volt_range4, 0x7f, MT6358_BUCK_VS1_DBG0, 0x7f, - MT6358_VS1_ANA_CON0, 8), + 0x7f, MT6358_BUCK_VS1_DBG0, 0x7f, MT6358_VS1_ANA_CON0, 8), MT6366_REG_FIXED("ldo_vrf12", VRF12, MT6358_LDO_VRF12_CON0, 0, 1200000), MT6366_REG_FIXED("ldo_vio18", VIO18, @@ -673,21 +638,13 @@ static struct mt6358_regulator_info mt6366_regulators[] = { MT6366_LDO("ldo_vsim2", VSIM2, vsim_voltages, vsim_idx, MT6358_LDO_VSIM2_CON0, 0, MT6358_VSIM2_ANA_CON0, 0xf00), MT6366_LDO1("ldo_vsram_proc11", VSRAM_PROC11, 500000, 1293750, 6250, - buck_volt_range1, MT6358_LDO_VSRAM_PROC11_DBG0, 0x7f00, - MT6358_LDO_VSRAM_CON0, 0x7f), + MT6358_LDO_VSRAM_PROC11_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON0, 0x7f), MT6366_LDO1("ldo_vsram_others", VSRAM_OTHERS, 500000, 1293750, 6250, - buck_volt_range1, MT6358_LDO_VSRAM_OTHERS_DBG0, 0x7f00, - MT6358_LDO_VSRAM_CON2, 0x7f), - MT6366_LDO1("ldo_vsram_others_sshub", VSRAM_OTHERS_SSHUB, 500000, - 1293750, 6250, buck_volt_range1, - MT6358_LDO_VSRAM_OTHERS_SSHUB_CON1, 0x7f, - MT6358_LDO_VSRAM_OTHERS_SSHUB_CON1, 0x7f), + MT6358_LDO_VSRAM_OTHERS_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON2, 0x7f), MT6366_LDO1("ldo_vsram_gpu", VSRAM_GPU, 500000, 1293750, 6250, - buck_volt_range1, MT6358_LDO_VSRAM_GPU_DBG0, 0x7f00, - MT6358_LDO_VSRAM_CON3, 0x7f), + MT6358_LDO_VSRAM_GPU_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON3, 0x7f), MT6366_LDO1("ldo_vsram_proc12", VSRAM_PROC12, 500000, 1293750, 6250, - buck_volt_range1, MT6358_LDO_VSRAM_PROC12_DBG0, 0x7f00, - MT6358_LDO_VSRAM_CON1, 0x7f), + MT6358_LDO_VSRAM_PROC12_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON1, 0x7f), }; static int mt6358_regulator_probe(struct platform_device *pdev) diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index df782646e856f..ab2f35bc294da 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -518,12 +518,12 @@ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn, if (port) { put_device(&port->dev); retval = -EEXIST; - goto err_out; + goto err_put; } port = kzalloc(sizeof(struct zfcp_port), GFP_KERNEL); if (!port) - goto err_out; + goto err_put; rwlock_init(&port->unit_list_lock); INIT_LIST_HEAD(&port->unit_list); @@ -546,7 +546,7 @@ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn, if (dev_set_name(&port->dev, "0x%016llx", (unsigned long long)wwpn)) { kfree(port); - goto err_out; + goto err_put; } retval = -EINVAL; @@ -563,7 +563,8 @@ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn, return port; -err_out: +err_put: zfcp_ccw_adapter_put(adapter); +err_out: return ERR_PTR(retval); } diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 3f062e4013ab6..013a9a334972e 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -1451,7 +1451,7 @@ retry_next: #endif break; } - scsi_rescan_device(&device->sdev_gendev); + scsi_rescan_device(device); break; default: diff --git a/drivers/scsi/mvumi.c b/drivers/scsi/mvumi.c index 05d3ce9b72dba..c4acf65379d20 100644 --- a/drivers/scsi/mvumi.c +++ b/drivers/scsi/mvumi.c @@ -1500,7 +1500,7 @@ static void mvumi_rescan_devices(struct mvumi_hba *mhba, int id) sdev = scsi_device_lookup(mhba->shost, 0, id, 0); if (sdev) { - scsi_rescan_device(&sdev->sdev_gendev); + scsi_rescan_device(sdev); scsi_device_put(sdev); } } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index fb6e9a7a7f58b..d25e1c2472538 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2445,7 +2445,7 @@ static void scsi_evt_emit(struct scsi_device *sdev, struct scsi_event *evt) envp[idx++] = "SDEV_MEDIA_CHANGE=1"; break; case SDEV_EVT_INQUIRY_CHANGE_REPORTED: - scsi_rescan_device(&sdev->sdev_gendev); + scsi_rescan_device(sdev); envp[idx++] = "SDEV_UA=INQUIRY_DATA_HAS_CHANGED"; break; case SDEV_EVT_CAPACITY_CHANGE_REPORTED: diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index c52de9a973e46..b14545acb40f5 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -132,7 +132,6 @@ extern int scsi_complete_async_scans(void); extern int scsi_scan_host_selected(struct Scsi_Host *, unsigned int, unsigned int, u64, enum scsi_scan_mode); extern void scsi_forget_host(struct Scsi_Host *); -extern void scsi_rescan_device(struct device *); /* scsi_sysctl.c */ #ifdef CONFIG_SYSCTL diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index d12f2dcb4040a..ed26c52ed8474 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1611,12 +1611,24 @@ int scsi_add_device(struct Scsi_Host *host, uint channel, } EXPORT_SYMBOL(scsi_add_device); -void scsi_rescan_device(struct device *dev) +int scsi_rescan_device(struct scsi_device *sdev) { - struct scsi_device *sdev = to_scsi_device(dev); + struct device *dev = &sdev->sdev_gendev; + int ret = 0; device_lock(dev); + /* + * Bail out if the device is not running. Otherwise, the rescan may + * block waiting for commands to be executed, with us holding the + * device lock. This can result in a potential deadlock in the power + * management core code when system resume is on-going. + */ + if (sdev->sdev_state != SDEV_RUNNING) { + ret = -EWOULDBLOCK; + goto unlock; + } + scsi_attach_vpd(sdev); if (sdev->handler && sdev->handler->rescan) @@ -1629,7 +1641,11 @@ void scsi_rescan_device(struct device *dev) drv->rescan(dev); module_put(dev->driver->owner); } + +unlock: device_unlock(dev); + + return ret; } EXPORT_SYMBOL(scsi_rescan_device); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index cac7c902cf70a..1f531063d6331 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -762,7 +762,7 @@ static ssize_t store_rescan_field (struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - scsi_rescan_device(dev); + scsi_rescan_device(to_scsi_device(dev)); return count; } static DEVICE_ATTR(rescan, S_IWUSR, NULL, store_rescan_field); @@ -855,7 +855,7 @@ store_state_field(struct device *dev, struct device_attribute *attr, * waiting for pending I/O to finish. */ blk_mq_run_hw_queues(sdev->request_queue, true); - scsi_rescan_device(dev); + scsi_rescan_device(sdev); } return ret == 0 ? count : -EINVAL; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index e934779bf05c8..30184f7b762c1 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -213,18 +213,32 @@ cache_type_store(struct device *dev, struct device_attribute *attr, } static ssize_t -manage_start_stop_show(struct device *dev, struct device_attribute *attr, - char *buf) +manage_start_stop_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; - return sprintf(buf, "%u\n", sdp->manage_start_stop); + return sysfs_emit(buf, "%u\n", + sdp->manage_system_start_stop && + sdp->manage_runtime_start_stop); } +static DEVICE_ATTR_RO(manage_start_stop); static ssize_t -manage_start_stop_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +manage_system_start_stop_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct scsi_disk *sdkp = to_scsi_disk(dev); + struct scsi_device *sdp = sdkp->device; + + return sysfs_emit(buf, "%u\n", sdp->manage_system_start_stop); +} + +static ssize_t +manage_system_start_stop_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; @@ -236,11 +250,42 @@ manage_start_stop_store(struct device *dev, struct device_attribute *attr, if (kstrtobool(buf, &v)) return -EINVAL; - sdp->manage_start_stop = v; + sdp->manage_system_start_stop = v; return count; } -static DEVICE_ATTR_RW(manage_start_stop); +static DEVICE_ATTR_RW(manage_system_start_stop); + +static ssize_t +manage_runtime_start_stop_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct scsi_disk *sdkp = to_scsi_disk(dev); + struct scsi_device *sdp = sdkp->device; + + return sysfs_emit(buf, "%u\n", sdp->manage_runtime_start_stop); +} + +static ssize_t +manage_runtime_start_stop_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct scsi_disk *sdkp = to_scsi_disk(dev); + struct scsi_device *sdp = sdkp->device; + bool v; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (kstrtobool(buf, &v)) + return -EINVAL; + + sdp->manage_runtime_start_stop = v; + + return count; +} +static DEVICE_ATTR_RW(manage_runtime_start_stop); static ssize_t allow_restart_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -572,6 +617,8 @@ static struct attribute *sd_disk_attrs[] = { &dev_attr_FUA.attr, &dev_attr_allow_restart.attr, &dev_attr_manage_start_stop.attr, + &dev_attr_manage_system_start_stop.attr, + &dev_attr_manage_runtime_start_stop.attr, &dev_attr_protection_type.attr, &dev_attr_protection_mode.attr, &dev_attr_app_tag_own.attr, @@ -3579,7 +3626,8 @@ static int sd_remove(struct device *dev) device_del(&sdkp->disk_dev); del_gendisk(sdkp->disk); - sd_shutdown(dev); + if (!sdkp->suspended) + sd_shutdown(dev); put_disk(sdkp->disk); return 0; @@ -3652,13 +3700,20 @@ static void sd_shutdown(struct device *dev) sd_sync_cache(sdkp, NULL); } - if (system_state != SYSTEM_RESTART && sdkp->device->manage_start_stop) { + if (system_state != SYSTEM_RESTART && + sdkp->device->manage_system_start_stop) { sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); sd_start_stop_device(sdkp, 0); } } -static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) +static inline bool sd_do_start_stop(struct scsi_device *sdev, bool runtime) +{ + return (sdev->manage_system_start_stop && !runtime) || + (sdev->manage_runtime_start_stop && runtime); +} + +static int sd_suspend_common(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); struct scsi_sense_hdr sshdr; @@ -3690,15 +3745,18 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) } } - if (sdkp->device->manage_start_stop) { + if (sd_do_start_stop(sdkp->device, runtime)) { if (!sdkp->device->silence_suspend) sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); /* an error is not worth aborting a system sleep */ ret = sd_start_stop_device(sdkp, 0); - if (ignore_stop_errors) + if (!runtime) ret = 0; } + if (!ret) + sdkp->suspended = true; + return ret; } @@ -3707,29 +3765,37 @@ static int sd_suspend_system(struct device *dev) if (pm_runtime_suspended(dev)) return 0; - return sd_suspend_common(dev, true); + return sd_suspend_common(dev, false); } static int sd_suspend_runtime(struct device *dev) { - return sd_suspend_common(dev, false); + return sd_suspend_common(dev, true); } -static int sd_resume(struct device *dev) +static int sd_resume(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); - int ret; + int ret = 0; if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ return 0; - if (!sdkp->device->manage_start_stop) + if (!sd_do_start_stop(sdkp->device, runtime)) { + sdkp->suspended = false; return 0; + } - sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); - ret = sd_start_stop_device(sdkp, 1); - if (!ret) + if (!sdkp->device->no_start_on_resume) { + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); + ret = sd_start_stop_device(sdkp, 1); + } + + if (!ret) { opal_unlock_from_suspend(sdkp->opal_dev); + sdkp->suspended = false; + } + return ret; } @@ -3738,7 +3804,7 @@ static int sd_resume_system(struct device *dev) if (pm_runtime_suspended(dev)) return 0; - return sd_resume(dev); + return sd_resume(dev, false); } static int sd_resume_runtime(struct device *dev) @@ -3762,7 +3828,7 @@ static int sd_resume_runtime(struct device *dev) "Failed to clear sense data\n"); } - return sd_resume(dev); + return sd_resume(dev, true); } /** diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 5eea762f84d18..409dda5350d10 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -131,6 +131,7 @@ struct scsi_disk { u8 provisioning_mode; u8 zeroing_mode; u8 nr_actuators; /* Number of actuators */ + bool suspended; /* Disk is suspended (stopped) */ unsigned ATO : 1; /* state of disk ATO bit */ unsigned cache_override : 1; /* temp override of WCE,RCD */ unsigned WCE : 1; /* state of disk WCE bit */ diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 9f0f69c1ed665..47d487729635c 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -2278,7 +2278,7 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info, device->advertised_queue_depth = device->queue_depth; scsi_change_queue_depth(device->sdev, device->advertised_queue_depth); if (device->rescan) { - scsi_rescan_device(&device->sdev->sdev_gendev); + scsi_rescan_device(device->sdev); device->rescan = false; } } diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 7a1dc5c7c49ee..c2d981d5a2dd5 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -471,7 +471,7 @@ static void storvsc_device_scan(struct work_struct *work) sdev = scsi_device_lookup(wrk->host, 0, wrk->tgt_id, wrk->lun); if (!sdev) goto done; - scsi_rescan_device(&sdev->sdev_gendev); + scsi_rescan_device(sdev); scsi_device_put(sdev); done: diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 2a79ab16134b1..3f8c553f3d91e 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -325,7 +325,7 @@ static void virtscsi_handle_param_change(struct virtio_scsi *vscsi, /* Handle "Parameters changed", "Mode parameters changed", and "Capacity data has changed". */ if (asc == 0x2a && (ascq == 0x00 || ascq == 0x01 || ascq == 0x09)) - scsi_rescan_device(&sdev->sdev_gendev); + scsi_rescan_device(sdev); scsi_device_put(sdev); } diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c index c760aac070e54..3b56d5e7080e1 100644 --- a/drivers/spi/spi-zynqmp-gqspi.c +++ b/drivers/spi/spi-zynqmp-gqspi.c @@ -1218,9 +1218,9 @@ static int zynqmp_qspi_probe(struct platform_device *pdev) return 0; clk_dis_all: - pm_runtime_put_sync(&pdev->dev); - pm_runtime_set_suspended(&pdev->dev); pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); clk_disable_unprepare(xqspi->refclk); clk_dis_pclk: clk_disable_unprepare(xqspi->pclk); @@ -1244,11 +1244,15 @@ static int zynqmp_qspi_remove(struct platform_device *pdev) { struct zynqmp_qspi *xqspi = platform_get_drvdata(pdev); + pm_runtime_get_sync(&pdev->dev); + zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0); + + pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); clk_disable_unprepare(xqspi->refclk); clk_disable_unprepare(xqspi->pclk); - pm_runtime_set_suspended(&pdev->dev); - pm_runtime_disable(&pdev->dev); return 0; } diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index d21f88de197c7..301fe376a1206 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -883,7 +883,6 @@ sector_t target_to_linux_sector(struct se_device *dev, sector_t lb) EXPORT_SYMBOL(target_to_linux_sector); struct devices_idr_iter { - struct config_item *prev_item; int (*fn)(struct se_device *dev, void *data); void *data; }; @@ -893,11 +892,9 @@ static int target_devices_idr_iter(int id, void *p, void *data) { struct devices_idr_iter *iter = data; struct se_device *dev = p; + struct config_item *item; int ret; - config_item_put(iter->prev_item); - iter->prev_item = NULL; - /* * We add the device early to the idr, so it can be used * by backend modules during configuration. We do not want @@ -907,12 +904,13 @@ static int target_devices_idr_iter(int id, void *p, void *data) if (!target_dev_configured(dev)) return 0; - iter->prev_item = config_item_get_unless_zero(&dev->dev_group.cg_item); - if (!iter->prev_item) + item = config_item_get_unless_zero(&dev->dev_group.cg_item); + if (!item) return 0; mutex_unlock(&device_mutex); ret = iter->fn(dev, iter->data); + config_item_put(item); mutex_lock(&device_mutex); return ret; @@ -935,7 +933,6 @@ int target_for_each_device(int (*fn)(struct se_device *dev, void *data), mutex_lock(&device_mutex); ret = idr_for_each(&devices_idr, target_devices_idr_iter, &iter); mutex_unlock(&device_mutex); - config_item_put(iter.prev_item); return ret; } diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index 33eb941fcf154..10bfc5f1c50d5 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -123,8 +123,18 @@ static inline ssize_t vringh_iov_xfer(struct vringh *vrh, done += partlen; len -= partlen; ptr += partlen; + iov->consumed += partlen; + iov->iov[iov->i].iov_len -= partlen; + iov->iov[iov->i].iov_base += partlen; - vringh_kiov_advance(iov, partlen); + if (!iov->iov[iov->i].iov_len) { + /* Fix up old iov element then increment. */ + iov->iov[iov->i].iov_len = iov->consumed; + iov->iov[iov->i].iov_base -= iov->consumed; + + iov->consumed = 0; + iov->i++; + } } return done; } diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index c443f04aaad77..80b46de14f413 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -96,6 +97,7 @@ enum xen_irq_type { struct irq_info { struct list_head list; struct list_head eoi_list; + struct rcu_work rwork; short refcnt; u8 spurious_cnt; u8 is_accounted; @@ -145,23 +147,13 @@ const struct evtchn_ops *evtchn_ops; */ static DEFINE_MUTEX(irq_mapping_update_lock); -/* - * Lock protecting event handling loop against removing event channels. - * Adding of event channels is no issue as the associated IRQ becomes active - * only after everything is setup (before request_[threaded_]irq() the handler - * can't be entered for an event, as the event channel will be unmasked only - * then). - */ -static DEFINE_RWLOCK(evtchn_rwlock); - /* * Lock hierarchy: * * irq_mapping_update_lock - * evtchn_rwlock - * IRQ-desc lock - * percpu eoi_list_lock - * irq_info->lock + * IRQ-desc lock + * percpu eoi_list_lock + * irq_info->lock */ static LIST_HEAD(xen_irq_list_head); @@ -305,6 +297,22 @@ static void channels_on_cpu_inc(struct irq_info *info) info->is_accounted = 1; } +static void delayed_free_irq(struct work_struct *work) +{ + struct irq_info *info = container_of(to_rcu_work(work), struct irq_info, + rwork); + unsigned int irq = info->irq; + + /* Remove the info pointer only now, with no potential users left. */ + set_info_for_irq(irq, NULL); + + kfree(info); + + /* Legacy IRQ descriptors are managed by the arch. */ + if (irq >= nr_legacy_irqs()) + irq_free_desc(irq); +} + /* Constructors for packed IRQ information. */ static int xen_irq_info_common_setup(struct irq_info *info, unsigned irq, @@ -667,33 +675,36 @@ static void xen_irq_lateeoi_worker(struct work_struct *work) eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed); - read_lock_irqsave(&evtchn_rwlock, flags); + rcu_read_lock(); while (true) { - spin_lock(&eoi->eoi_list_lock); + spin_lock_irqsave(&eoi->eoi_list_lock, flags); info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, eoi_list); - if (info == NULL || now < info->eoi_time) { - spin_unlock(&eoi->eoi_list_lock); + if (info == NULL) + break; + + if (now < info->eoi_time) { + mod_delayed_work_on(info->eoi_cpu, system_wq, + &eoi->delayed, + info->eoi_time - now); break; } list_del_init(&info->eoi_list); - spin_unlock(&eoi->eoi_list_lock); + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); info->eoi_time = 0; xen_irq_lateeoi_locked(info, false); } - if (info) - mod_delayed_work_on(info->eoi_cpu, system_wq, - &eoi->delayed, info->eoi_time - now); + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); - read_unlock_irqrestore(&evtchn_rwlock, flags); + rcu_read_unlock(); } static void xen_cpu_init_eoi(unsigned int cpu) @@ -708,16 +719,15 @@ static void xen_cpu_init_eoi(unsigned int cpu) void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) { struct irq_info *info; - unsigned long flags; - read_lock_irqsave(&evtchn_rwlock, flags); + rcu_read_lock(); info = info_for_irq(irq); if (info) xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS); - read_unlock_irqrestore(&evtchn_rwlock, flags); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(xen_irq_lateeoi); @@ -731,6 +741,7 @@ static void xen_irq_init(unsigned irq) info->type = IRQT_UNBOUND; info->refcnt = -1; + INIT_RCU_WORK(&info->rwork, delayed_free_irq); set_info_for_irq(irq, info); /* @@ -788,31 +799,18 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) static void xen_free_irq(unsigned irq) { struct irq_info *info = info_for_irq(irq); - unsigned long flags; if (WARN_ON(!info)) return; - write_lock_irqsave(&evtchn_rwlock, flags); - if (!list_empty(&info->eoi_list)) lateeoi_list_del(info); list_del(&info->list); - set_info_for_irq(irq, NULL); - WARN_ON(info->refcnt > 0); - write_unlock_irqrestore(&evtchn_rwlock, flags); - - kfree(info); - - /* Legacy IRQ descriptors are managed by the arch. */ - if (irq < nr_legacy_irqs()) - return; - - irq_free_desc(irq); + queue_rcu_work(system_wq, &info->rwork); } static void xen_evtchn_close(evtchn_port_t port) @@ -1716,7 +1714,14 @@ static void __xen_evtchn_do_upcall(void) int cpu = smp_processor_id(); struct evtchn_loop_ctrl ctrl = { 0 }; - read_lock(&evtchn_rwlock); + /* + * When closing an event channel the associated IRQ must not be freed + * until all cpus have left the event handling loop. This is ensured + * by taking the rcu_read_lock() while handling events, as freeing of + * the IRQ is handled via queue_rcu_work() _after_ closing the event + * channel. + */ + rcu_read_lock(); do { vcpu_info->evtchn_upcall_pending = 0; @@ -1729,7 +1734,7 @@ static void __xen_evtchn_do_upcall(void) } while (vcpu_info->evtchn_upcall_pending); - read_unlock(&evtchn_rwlock); + rcu_read_unlock(); /* * Increment irq_epoch only now to defer EOIs only for diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3bcef0c4d6fc4..27d06bb5e5c05 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -28,6 +28,7 @@ #include #include #include +#include #include "extent-io-tree.h" #include "extent_io.h" #include "extent_map.h" @@ -3238,11 +3239,11 @@ static inline void btrfs_clear_sb_rdonly(struct super_block *sb) /* root-item.c */ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, - u64 ref_id, u64 dirid, u64 sequence, const char *name, - int name_len); + u64 ref_id, u64 dirid, u64 sequence, + const struct fscrypt_str *name); int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, - u64 ref_id, u64 dirid, u64 *sequence, const char *name, - int name_len); + u64 ref_id, u64 dirid, u64 *sequence, + const struct fscrypt_str *name); int btrfs_del_root(struct btrfs_trans_handle *trans, const struct btrfs_key *key); int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -3271,25 +3272,23 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info); /* dir-item.c */ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, - const char *name, int name_len); -int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name, - int name_len, struct btrfs_inode *dir, + const struct fscrypt_str *name); +int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, + const struct fscrypt_str *name, struct btrfs_inode *dir, struct btrfs_key *location, u8 type, u64 index); struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, - const char *name, int name_len, - int mod); + const struct fscrypt_str *name, int mod); struct btrfs_dir_item * btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, - u64 index, const char *name, int name_len, - int mod); + u64 index, const struct fscrypt_str *name, int mod); struct btrfs_dir_item * btrfs_search_dir_index_item(struct btrfs_root *root, struct btrfs_path *path, u64 dirid, - const char *name, int name_len); + const struct fscrypt_str *name); int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -3370,10 +3369,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index); int btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_inode *dir, struct btrfs_inode *inode, - const char *name, int name_len); + const struct fscrypt_str *name); int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_inode *parent_inode, struct btrfs_inode *inode, - const char *name, int name_len, int add_backref, u64 index); + const struct fscrypt_str *name, int add_backref, u64 index); int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry); int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len, int front); @@ -3398,6 +3397,7 @@ struct btrfs_new_inode_args { */ struct posix_acl *default_acl; struct posix_acl *acl; + struct fscrypt_name fname; }; int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args, unsigned int *trans_num_items); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 72fb2c518a2b4..fdab48c1abb8a 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -103,8 +103,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, * to use for the second index (if one is created). * Will return 0 or -ENOMEM */ -int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name, - int name_len, struct btrfs_inode *dir, +int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, + const struct fscrypt_str *name, struct btrfs_inode *dir, struct btrfs_key *location, u8 type, u64 index) { int ret = 0; @@ -120,7 +120,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name, key.objectid = btrfs_ino(dir); key.type = BTRFS_DIR_ITEM_KEY; - key.offset = btrfs_name_hash(name, name_len); + key.offset = btrfs_name_hash(name->name, name->len); path = btrfs_alloc_path(); if (!path) @@ -128,9 +128,9 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name, btrfs_cpu_key_to_disk(&disk_key, location); - data_size = sizeof(*dir_item) + name_len; + data_size = sizeof(*dir_item) + name->len; dir_item = insert_with_overflow(trans, root, path, &key, data_size, - name, name_len); + name->name, name->len); if (IS_ERR(dir_item)) { ret = PTR_ERR(dir_item); if (ret == -EEXIST) @@ -142,11 +142,11 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name, btrfs_set_dir_item_key(leaf, dir_item, &disk_key); btrfs_set_dir_type(leaf, dir_item, type); btrfs_set_dir_data_len(leaf, dir_item, 0); - btrfs_set_dir_name_len(leaf, dir_item, name_len); + btrfs_set_dir_name_len(leaf, dir_item, name->len); btrfs_set_dir_transid(leaf, dir_item, trans->transid); name_ptr = (unsigned long)(dir_item + 1); - write_extent_buffer(leaf, name, name_ptr, name_len); + write_extent_buffer(leaf, name->name, name_ptr, name->len); btrfs_mark_buffer_dirty(leaf); second_insert: @@ -157,7 +157,7 @@ second_insert: } btrfs_release_path(path); - ret2 = btrfs_insert_delayed_dir_index(trans, name, name_len, dir, + ret2 = btrfs_insert_delayed_dir_index(trans, name->name, name->len, dir, &disk_key, type, index); out_free: btrfs_free_path(path); @@ -206,7 +206,7 @@ static struct btrfs_dir_item *btrfs_lookup_match_dir( struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, - const char *name, int name_len, + const struct fscrypt_str *name, int mod) { struct btrfs_key key; @@ -214,9 +214,10 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, key.objectid = dir; key.type = BTRFS_DIR_ITEM_KEY; - key.offset = btrfs_name_hash(name, name_len); + key.offset = btrfs_name_hash(name->name, name->len); - di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod); + di = btrfs_lookup_match_dir(trans, root, path, &key, name->name, + name->len, mod); if (IS_ERR(di) && PTR_ERR(di) == -ENOENT) return NULL; @@ -224,7 +225,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, } int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, - const char *name, int name_len) + const struct fscrypt_str *name) { int ret; struct btrfs_key key; @@ -240,9 +241,10 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, key.objectid = dir; key.type = BTRFS_DIR_ITEM_KEY; - key.offset = btrfs_name_hash(name, name_len); + key.offset = btrfs_name_hash(name->name, name->len); - di = btrfs_lookup_match_dir(NULL, root, path, &key, name, name_len, 0); + di = btrfs_lookup_match_dir(NULL, root, path, &key, name->name, + name->len, 0); if (IS_ERR(di)) { ret = PTR_ERR(di); /* Nothing found, we're safe */ @@ -262,11 +264,8 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, goto out; } - /* - * see if there is room in the item to insert this - * name - */ - data_size = sizeof(*di) + name_len; + /* See if there is room in the item to insert this name. */ + data_size = sizeof(*di) + name->len; leaf = path->nodes[0]; slot = path->slots[0]; if (data_size + btrfs_item_size(leaf, slot) + @@ -303,8 +302,7 @@ struct btrfs_dir_item * btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, - u64 index, const char *name, int name_len, - int mod) + u64 index, const struct fscrypt_str *name, int mod) { struct btrfs_dir_item *di; struct btrfs_key key; @@ -313,7 +311,8 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, key.type = BTRFS_DIR_INDEX_KEY; key.offset = index; - di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod); + di = btrfs_lookup_match_dir(trans, root, path, &key, name->name, + name->len, mod); if (di == ERR_PTR(-ENOENT)) return NULL; @@ -321,9 +320,8 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, } struct btrfs_dir_item * -btrfs_search_dir_index_item(struct btrfs_root *root, - struct btrfs_path *path, u64 dirid, - const char *name, int name_len) +btrfs_search_dir_index_item(struct btrfs_root *root, struct btrfs_path *path, + u64 dirid, const struct fscrypt_str *name) { struct btrfs_dir_item *di; struct btrfs_key key; @@ -338,7 +336,7 @@ btrfs_search_dir_index_item(struct btrfs_root *root, break; di = btrfs_match_dir_item_name(root->fs_info, path, - name, name_len); + name->name, name->len); if (di) return di; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 77202addead83..0a46fff3dd067 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1458,8 +1458,13 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_flags & IOCB_NOWAIT) ilock_flags |= BTRFS_ILOCK_TRY; - /* If the write DIO is within EOF, use a shared lock */ - if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode)) + /* + * If the write DIO is within EOF, use a shared lock and also only if + * security bits will likely not be dropped by file_remove_privs() called + * from btrfs_write_check(). Either will need to be rechecked after the + * lock was acquired. + */ + if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode) && IS_NOSEC(inode)) ilock_flags |= BTRFS_ILOCK_SHARED; relock: @@ -1467,6 +1472,13 @@ relock: if (err < 0) return err; + /* Shared lock cannot be used with security bits set. */ + if ((ilock_flags & BTRFS_ILOCK_SHARED) && !IS_NOSEC(inode)) { + btrfs_inode_unlock(inode, ilock_flags); + ilock_flags &= ~BTRFS_ILOCK_SHARED; + goto relock; + } + err = generic_write_checks(iocb, from); if (err <= 0) { btrfs_inode_unlock(inode, ilock_flags); diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 0eeb5ea878948..5add022d3534f 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -10,8 +10,8 @@ #include "print-tree.h" struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf, - int slot, const char *name, - int name_len) + int slot, + const struct fscrypt_str *name) { struct btrfs_inode_ref *ref; unsigned long ptr; @@ -27,9 +27,10 @@ struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf, len = btrfs_inode_ref_name_len(leaf, ref); name_ptr = (unsigned long)(ref + 1); cur_offset += len + sizeof(*ref); - if (len != name_len) + if (len != name->len) continue; - if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) + if (memcmp_extent_buffer(leaf, name->name, name_ptr, + name->len) == 0) return ref; } return NULL; @@ -37,7 +38,7 @@ struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf, struct btrfs_inode_extref *btrfs_find_name_in_ext_backref( struct extent_buffer *leaf, int slot, u64 ref_objectid, - const char *name, int name_len) + const struct fscrypt_str *name) { struct btrfs_inode_extref *extref; unsigned long ptr; @@ -60,9 +61,10 @@ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref( name_ptr = (unsigned long)(&extref->name); ref_name_len = btrfs_inode_extref_name_len(leaf, extref); - if (ref_name_len == name_len && + if (ref_name_len == name->len && btrfs_inode_extref_parent(leaf, extref) == ref_objectid && - (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)) + (memcmp_extent_buffer(leaf, name->name, name_ptr, + name->len) == 0)) return extref; cur_offset += ref_name_len + sizeof(*extref); @@ -75,7 +77,7 @@ struct btrfs_inode_extref * btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - const char *name, int name_len, + const struct fscrypt_str *name, u64 inode_objectid, u64 ref_objectid, int ins_len, int cow) { @@ -84,7 +86,7 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, key.objectid = inode_objectid; key.type = BTRFS_INODE_EXTREF_KEY; - key.offset = btrfs_extref_hash(ref_objectid, name, name_len); + key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len); ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); if (ret < 0) @@ -92,13 +94,13 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, if (ret > 0) return NULL; return btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0], - ref_objectid, name, name_len); + ref_objectid, name); } static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - const char *name, int name_len, + const struct fscrypt_str *name, u64 inode_objectid, u64 ref_objectid, u64 *index) { @@ -107,14 +109,14 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, struct btrfs_inode_extref *extref; struct extent_buffer *leaf; int ret; - int del_len = name_len + sizeof(*extref); + int del_len = name->len + sizeof(*extref); unsigned long ptr; unsigned long item_start; u32 item_size; key.objectid = inode_objectid; key.type = BTRFS_INODE_EXTREF_KEY; - key.offset = btrfs_extref_hash(ref_objectid, name, name_len); + key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len); path = btrfs_alloc_path(); if (!path) @@ -132,7 +134,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, * readonly. */ extref = btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0], - ref_objectid, name, name_len); + ref_objectid, name); if (!extref) { btrfs_handle_fs_error(root->fs_info, -ENOENT, NULL); ret = -EROFS; @@ -168,8 +170,7 @@ out: } int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - const char *name, int name_len, + struct btrfs_root *root, const struct fscrypt_str *name, u64 inode_objectid, u64 ref_objectid, u64 *index) { struct btrfs_path *path; @@ -182,7 +183,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, u32 sub_item_len; int ret; int search_ext_refs = 0; - int del_len = name_len + sizeof(*ref); + int del_len = name->len + sizeof(*ref); key.objectid = inode_objectid; key.offset = ref_objectid; @@ -201,8 +202,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, goto out; } - ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], name, - name_len); + ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], name); if (!ref) { ret = -ENOENT; search_ext_refs = 1; @@ -219,7 +219,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, goto out; } ptr = (unsigned long)ref; - sub_item_len = name_len + sizeof(*ref); + sub_item_len = name->len + sizeof(*ref); item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, item_size - (ptr + sub_item_len - item_start)); @@ -233,7 +233,7 @@ out: * name in our ref array. Find and remove the extended * inode ref then. */ - return btrfs_del_inode_extref(trans, root, name, name_len, + return btrfs_del_inode_extref(trans, root, name, inode_objectid, ref_objectid, index); } @@ -247,12 +247,13 @@ out: */ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - const char *name, int name_len, - u64 inode_objectid, u64 ref_objectid, u64 index) + const struct fscrypt_str *name, + u64 inode_objectid, u64 ref_objectid, + u64 index) { struct btrfs_inode_extref *extref; int ret; - int ins_len = name_len + sizeof(*extref); + int ins_len = name->len + sizeof(*extref); unsigned long ptr; struct btrfs_path *path; struct btrfs_key key; @@ -260,7 +261,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, key.objectid = inode_objectid; key.type = BTRFS_INODE_EXTREF_KEY; - key.offset = btrfs_extref_hash(ref_objectid, name, name_len); + key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len); path = btrfs_alloc_path(); if (!path) @@ -272,7 +273,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, if (btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0], ref_objectid, - name, name_len)) + name)) goto out; btrfs_extend_item(path, ins_len); @@ -286,12 +287,12 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, ptr += btrfs_item_size(leaf, path->slots[0]) - ins_len; extref = (struct btrfs_inode_extref *)ptr; - btrfs_set_inode_extref_name_len(path->nodes[0], extref, name_len); + btrfs_set_inode_extref_name_len(path->nodes[0], extref, name->len); btrfs_set_inode_extref_index(path->nodes[0], extref, index); btrfs_set_inode_extref_parent(path->nodes[0], extref, ref_objectid); ptr = (unsigned long)&extref->name; - write_extent_buffer(path->nodes[0], name, ptr, name_len); + write_extent_buffer(path->nodes[0], name->name, ptr, name->len); btrfs_mark_buffer_dirty(path->nodes[0]); out: @@ -301,8 +302,7 @@ out: /* Will return 0, -ENOMEM, -EMLINK, or -EEXIST or anything from the CoW path */ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - const char *name, int name_len, + struct btrfs_root *root, const struct fscrypt_str *name, u64 inode_objectid, u64 ref_objectid, u64 index) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -311,7 +311,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, struct btrfs_inode_ref *ref; unsigned long ptr; int ret; - int ins_len = name_len + sizeof(*ref); + int ins_len = name->len + sizeof(*ref); key.objectid = inode_objectid; key.offset = ref_objectid; @@ -327,7 +327,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, if (ret == -EEXIST) { u32 old_size; ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], - name, name_len); + name); if (ref) goto out; @@ -336,7 +336,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, ref = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_ref); ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size); - btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); + btrfs_set_inode_ref_name_len(path->nodes[0], ref, name->len); btrfs_set_inode_ref_index(path->nodes[0], ref, index); ptr = (unsigned long)(ref + 1); ret = 0; @@ -344,7 +344,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, if (ret == -EOVERFLOW) { if (btrfs_find_name_in_backref(path->nodes[0], path->slots[0], - name, name_len)) + name)) ret = -EEXIST; else ret = -EMLINK; @@ -353,11 +353,11 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, } else { ref = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_ref); - btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); + btrfs_set_inode_ref_name_len(path->nodes[0], ref, name->len); btrfs_set_inode_ref_index(path->nodes[0], ref, index); ptr = (unsigned long)(ref + 1); } - write_extent_buffer(path->nodes[0], name, ptr, name_len); + write_extent_buffer(path->nodes[0], name->name, ptr, name->len); btrfs_mark_buffer_dirty(path->nodes[0]); out: @@ -370,7 +370,6 @@ out: if (btrfs_super_incompat_flags(disk_super) & BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) ret = btrfs_insert_inode_extref(trans, root, name, - name_len, inode_objectid, ref_objectid, index); } diff --git a/fs/btrfs/inode-item.h b/fs/btrfs/inode-item.h index a8fc16d0147f6..b80aeb7157010 100644 --- a/fs/btrfs/inode-item.h +++ b/fs/btrfs/inode-item.h @@ -64,33 +64,31 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_truncate_control *control); int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - const char *name, int name_len, + struct btrfs_root *root, const struct fscrypt_str *name, u64 inode_objectid, u64 ref_objectid, u64 index); int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - const char *name, int name_len, - u64 inode_objectid, u64 ref_objectid, u64 *index); + struct btrfs_root *root, const struct fscrypt_str *name, + u64 inode_objectid, u64 ref_objectid, u64 *index); int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid); -int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, +int btrfs_lookup_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *location, int mod); struct btrfs_inode_extref *btrfs_lookup_inode_extref( struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - const char *name, int name_len, + const struct fscrypt_str *name, u64 inode_objectid, u64 ref_objectid, int ins_len, int cow); struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf, - int slot, const char *name, - int name_len); + int slot, + const struct fscrypt_str *name); struct btrfs_inode_extref *btrfs_find_name_in_ext_backref( struct extent_buffer *leaf, int slot, u64 ref_objectid, - const char *name, int name_len); + const struct fscrypt_str *name); #endif diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 222068bf80031..4063447217f92 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3627,7 +3627,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) spin_unlock(&fs_info->delayed_iput_lock); } -/** +/* * Wait for flushing all delayed iputs * * @fs_info: the filesystem @@ -4272,7 +4272,7 @@ int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_inode *dir, struct btrfs_inode *inode, - const char *name, int name_len, + const struct fscrypt_str *name, struct btrfs_rename_ctx *rename_ctx) { struct btrfs_root *root = dir->root; @@ -4290,8 +4290,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, goto out; } - di = btrfs_lookup_dir_item(trans, root, path, dir_ino, - name, name_len, -1); + di = btrfs_lookup_dir_item(trans, root, path, dir_ino, name, -1); if (IS_ERR_OR_NULL(di)) { ret = di ? PTR_ERR(di) : -ENOENT; goto err; @@ -4319,12 +4318,11 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, } } - ret = btrfs_del_inode_ref(trans, root, name, name_len, ino, - dir_ino, &index); + ret = btrfs_del_inode_ref(trans, root, name, ino, dir_ino, &index); if (ret) { btrfs_info(fs_info, "failed to delete reference to %.*s, inode %llu parent %llu", - name_len, name, ino, dir_ino); + name->len, name->name, ino, dir_ino); btrfs_abort_transaction(trans, ret); goto err; } @@ -4345,10 +4343,8 @@ skip_backref: * operations on the log tree, increasing latency for applications. */ if (!rename_ctx) { - btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode, - dir_ino); - btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir, - index); + btrfs_del_inode_ref_in_log(trans, root, name, inode, dir_ino); + btrfs_del_dir_entries_in_log(trans, root, name, dir, index); } /* @@ -4366,7 +4362,7 @@ err: if (ret) goto out; - btrfs_i_size_write(dir, dir->vfs_inode.i_size - name_len * 2); + btrfs_i_size_write(dir, dir->vfs_inode.i_size - name->len * 2); inode_inc_iversion(&inode->vfs_inode); inode_inc_iversion(&dir->vfs_inode); inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode); @@ -4379,10 +4375,11 @@ out: int btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_inode *dir, struct btrfs_inode *inode, - const char *name, int name_len) + const struct fscrypt_str *name) { int ret; - ret = __btrfs_unlink_inode(trans, dir, inode, name, name_len, NULL); + + ret = __btrfs_unlink_inode(trans, dir, inode, name, NULL); if (!ret) { drop_nlink(&inode->vfs_inode); ret = btrfs_update_inode(trans, inode->root, inode); @@ -4418,29 +4415,39 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) struct btrfs_trans_handle *trans; struct inode *inode = d_inode(dentry); int ret; + struct fscrypt_name fname; + + ret = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname); + if (ret) + return ret; + + /* This needs to handle no-key deletions later on */ trans = __unlink_start_trans(dir); - if (IS_ERR(trans)) - return PTR_ERR(trans); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto fscrypt_free; + } btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), 0); - ret = btrfs_unlink_inode(trans, BTRFS_I(dir), - BTRFS_I(d_inode(dentry)), dentry->d_name.name, - dentry->d_name.len); + ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), + &fname.disk_name); if (ret) - goto out; + goto end_trans; if (inode->i_nlink == 0) { ret = btrfs_orphan_add(trans, BTRFS_I(inode)); if (ret) - goto out; + goto end_trans; } -out: +end_trans: btrfs_end_transaction(trans); btrfs_btree_balance_dirty(BTRFS_I(dir)->root->fs_info); +fscrypt_free: + fscrypt_free_filename(&fname); return ret; } @@ -4453,12 +4460,17 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_dir_item *di; struct btrfs_key key; - const char *name = dentry->d_name.name; - int name_len = dentry->d_name.len; u64 index; int ret; u64 objectid; u64 dir_ino = btrfs_ino(BTRFS_I(dir)); + struct fscrypt_name fname; + + ret = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname); + if (ret) + return ret; + + /* This needs to handle no-key deletions later on */ if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) { objectid = inode->root->root_key.objectid; @@ -4466,15 +4478,18 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, objectid = inode->location.objectid; } else { WARN_ON(1); + fscrypt_free_filename(&fname); return -EINVAL; } path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + ret = -ENOMEM; + goto out; + } di = btrfs_lookup_dir_item(trans, root, path, dir_ino, - name, name_len, -1); + &fname.disk_name, -1); if (IS_ERR_OR_NULL(di)) { ret = di ? PTR_ERR(di) : -ENOENT; goto out; @@ -4500,8 +4515,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, * call btrfs_del_root_ref, and it _shouldn't_ fail. */ if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) { - di = btrfs_search_dir_index_item(root, path, dir_ino, - name, name_len); + di = btrfs_search_dir_index_item(root, path, dir_ino, &fname.disk_name); if (IS_ERR_OR_NULL(di)) { if (!di) ret = -ENOENT; @@ -4518,7 +4532,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, } else { ret = btrfs_del_root_ref(trans, objectid, root->root_key.objectid, dir_ino, - &index, name, name_len); + &index, &fname.disk_name); if (ret) { btrfs_abort_transaction(trans, ret); goto out; @@ -4531,7 +4545,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, goto out; } - btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name_len * 2); + btrfs_i_size_write(BTRFS_I(dir), dir->i_size - fname.disk_name.len * 2); inode_inc_iversion(dir); dir->i_mtime = current_time(dir); dir->i_ctime = dir->i_mtime; @@ -4540,6 +4554,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, btrfs_abort_transaction(trans, ret); out: btrfs_free_path(path); + fscrypt_free_filename(&fname); return ret; } @@ -4553,6 +4568,7 @@ static noinline int may_destroy_subvol(struct btrfs_root *root) struct btrfs_path *path; struct btrfs_dir_item *di; struct btrfs_key key; + struct fscrypt_str name = FSTR_INIT("default", 7); u64 dir_id; int ret; @@ -4563,7 +4579,7 @@ static noinline int may_destroy_subvol(struct btrfs_root *root) /* Make sure this root isn't set as the default subvol */ dir_id = btrfs_super_root_dir(fs_info->super_copy); di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path, - dir_id, "default", 7, 0); + dir_id, &name, 0); if (di && !IS_ERR(di)) { btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); if (key.objectid == root->root_key.objectid) { @@ -4802,6 +4818,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) int err = 0; struct btrfs_trans_handle *trans; u64 last_unlink_trans; + struct fscrypt_name fname; if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) return -ENOTEMPTY; @@ -4814,9 +4831,17 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) return btrfs_delete_subvolume(dir, dentry); } + err = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname); + if (err) + return err; + + /* This needs to handle no-key deletions later on */ + trans = __unlink_start_trans(dir); - if (IS_ERR(trans)) - return PTR_ERR(trans); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto out_notrans; + } if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { err = btrfs_unlink_subvol(trans, dir, dentry); @@ -4830,9 +4855,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) last_unlink_trans = BTRFS_I(inode)->last_unlink_trans; /* now the directory is empty */ - err = btrfs_unlink_inode(trans, BTRFS_I(dir), - BTRFS_I(d_inode(dentry)), dentry->d_name.name, - dentry->d_name.len); + err = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), + &fname.disk_name); if (!err) { btrfs_i_size_write(BTRFS_I(inode), 0); /* @@ -4851,7 +4875,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) } out: btrfs_end_transaction(trans); +out_notrans: btrfs_btree_balance_dirty(fs_info); + fscrypt_free_filename(&fname); return err; } @@ -5532,19 +5558,24 @@ no_delete: static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, struct btrfs_key *location, u8 *type) { - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; struct btrfs_dir_item *di; struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(dir)->root; int ret = 0; + struct fscrypt_name fname; path = btrfs_alloc_path(); if (!path) return -ENOMEM; + ret = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname); + if (ret) + goto out; + + /* This needs to handle no-key deletions later on */ + di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)), - name, namelen, 0); + &fname.disk_name, 0); if (IS_ERR_OR_NULL(di)) { ret = di ? PTR_ERR(di) : -ENOENT; goto out; @@ -5556,12 +5587,13 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, ret = -EUCLEAN; btrfs_warn(root->fs_info, "%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))", - __func__, name, btrfs_ino(BTRFS_I(dir)), + __func__, fname.disk_name.name, btrfs_ino(BTRFS_I(dir)), location->objectid, location->type, location->offset); } if (!ret) *type = btrfs_dir_type(path->nodes[0], di); out: + fscrypt_free_filename(&fname); btrfs_free_path(path); return ret; } @@ -5584,6 +5616,11 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info, struct btrfs_key key; int ret; int err = 0; + struct fscrypt_name fname; + + ret = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname); + if (ret) + return ret; path = btrfs_alloc_path(); if (!path) { @@ -5606,12 +5643,11 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info, leaf = path->nodes[0]; ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(BTRFS_I(dir)) || - btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len) + btrfs_root_ref_name_len(leaf, ref) != fname.disk_name.len) goto out; - ret = memcmp_extent_buffer(leaf, dentry->d_name.name, - (unsigned long)(ref + 1), - dentry->d_name.len); + ret = memcmp_extent_buffer(leaf, fname.disk_name.name, + (unsigned long)(ref + 1), fname.disk_name.len); if (ret) goto out; @@ -5630,6 +5666,7 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info, err = 0; out: btrfs_free_path(path); + fscrypt_free_filename(&fname); return err; } @@ -6238,9 +6275,18 @@ int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args, struct inode *inode = args->inode; int ret; + if (!args->orphan) { + ret = fscrypt_setup_filename(dir, &args->dentry->d_name, 0, + &args->fname); + if (ret) + return ret; + } + ret = posix_acl_create(dir, &inode->i_mode, &args->default_acl, &args->acl); - if (ret) + if (ret) { + fscrypt_free_filename(&args->fname); return ret; + } /* 1 to add inode item */ *trans_num_items = 1; @@ -6280,6 +6326,7 @@ void btrfs_new_inode_args_destroy(struct btrfs_new_inode_args *args) { posix_acl_release(args->acl); posix_acl_release(args->default_acl); + fscrypt_free_filename(&args->fname); } /* @@ -6315,8 +6362,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, { struct inode *dir = args->dir; struct inode *inode = args->inode; - const char *name = args->orphan ? NULL : args->dentry->d_name.name; - int name_len = args->orphan ? 0 : args->dentry->d_name.len; + const struct fscrypt_str *name = args->orphan ? NULL : &args->fname.disk_name; struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); struct btrfs_root *root; struct btrfs_inode_item *inode_item; @@ -6417,7 +6463,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, sizes[1] = 2 + sizeof(*ref); } else { key[1].offset = btrfs_ino(BTRFS_I(dir)); - sizes[1] = name_len + sizeof(*ref); + sizes[1] = name->len + sizeof(*ref); } } @@ -6456,10 +6502,12 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, btrfs_set_inode_ref_index(path->nodes[0], ref, 0); write_extent_buffer(path->nodes[0], "..", ptr, 2); } else { - btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); + btrfs_set_inode_ref_name_len(path->nodes[0], ref, + name->len); btrfs_set_inode_ref_index(path->nodes[0], ref, BTRFS_I(inode)->dir_index); - write_extent_buffer(path->nodes[0], name, ptr, name_len); + write_extent_buffer(path->nodes[0], name->name, ptr, + name->len); } } @@ -6520,7 +6568,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, ret = btrfs_orphan_add(trans, BTRFS_I(inode)); } else { ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name, - name_len, 0, BTRFS_I(inode)->dir_index); + 0, BTRFS_I(inode)->dir_index); } if (ret) { btrfs_abort_transaction(trans, ret); @@ -6549,7 +6597,7 @@ out: */ int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_inode *parent_inode, struct btrfs_inode *inode, - const char *name, int name_len, int add_backref, u64 index) + const struct fscrypt_str *name, int add_backref, u64 index) { int ret = 0; struct btrfs_key key; @@ -6568,17 +6616,17 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) { ret = btrfs_add_root_ref(trans, key.objectid, root->root_key.objectid, parent_ino, - index, name, name_len); + index, name); } else if (add_backref) { - ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino, - parent_ino, index); + ret = btrfs_insert_inode_ref(trans, root, name, + ino, parent_ino, index); } /* Nothing to clean up yet */ if (ret) return ret; - ret = btrfs_insert_dir_item(trans, name, name_len, parent_inode, &key, + ret = btrfs_insert_dir_item(trans, name, parent_inode, &key, btrfs_inode_type(&inode->vfs_inode), index); if (ret == -EEXIST || ret == -EOVERFLOW) goto fail_dir_item; @@ -6588,7 +6636,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, } btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size + - name_len * 2); + name->len * 2); inode_inc_iversion(&parent_inode->vfs_inode); /* * If we are replaying a log tree, we do not want to update the mtime @@ -6613,15 +6661,15 @@ fail_dir_item: int err; err = btrfs_del_root_ref(trans, key.objectid, root->root_key.objectid, parent_ino, - &local_index, name, name_len); + &local_index, name); if (err) btrfs_abort_transaction(trans, err); } else if (add_backref) { u64 local_index; int err; - err = btrfs_del_inode_ref(trans, root, name, name_len, - ino, parent_ino, &local_index); + err = btrfs_del_inode_ref(trans, root, name, ino, parent_ino, + &local_index); if (err) btrfs_abort_transaction(trans, err); } @@ -6704,6 +6752,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, struct btrfs_root *root = BTRFS_I(dir)->root; struct inode *inode = d_inode(old_dentry); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct fscrypt_name fname; u64 index; int err; int drop_inode = 0; @@ -6715,6 +6764,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink >= BTRFS_LINK_MAX) return -EMLINK; + err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname); + if (err) + goto fail; + err = btrfs_set_inode_index(BTRFS_I(dir), &index); if (err) goto fail; @@ -6741,7 +6794,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), - dentry->d_name.name, dentry->d_name.len, 1, index); + &fname.disk_name, 1, index); if (err) { drop_inode = 1; @@ -6765,6 +6818,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, } fail: + fscrypt_free_filename(&fname); if (trans) btrfs_end_transaction(trans); if (drop_inode) { @@ -9037,6 +9091,8 @@ static int btrfs_rename_exchange(struct inode *old_dir, int ret; int ret2; bool need_abort = false; + struct fscrypt_name old_fname, new_fname; + struct fscrypt_str *old_name, *new_name; /* * For non-subvolumes allow exchange only within one subvolume, in the @@ -9048,6 +9104,19 @@ static int btrfs_rename_exchange(struct inode *old_dir, new_ino != BTRFS_FIRST_FREE_OBJECTID)) return -EXDEV; + ret = fscrypt_setup_filename(old_dir, &old_dentry->d_name, 0, &old_fname); + if (ret) + return ret; + + ret = fscrypt_setup_filename(new_dir, &new_dentry->d_name, 0, &new_fname); + if (ret) { + fscrypt_free_filename(&old_fname); + return ret; + } + + old_name = &old_fname.disk_name; + new_name = &new_fname.disk_name; + /* close the race window with snapshot create/destroy ioctl */ if (old_ino == BTRFS_FIRST_FREE_OBJECTID || new_ino == BTRFS_FIRST_FREE_OBJECTID) @@ -9115,10 +9184,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, /* force full log commit if subvolume involved. */ btrfs_set_log_full_commit(trans); } else { - ret = btrfs_insert_inode_ref(trans, dest, - new_dentry->d_name.name, - new_dentry->d_name.len, - old_ino, + ret = btrfs_insert_inode_ref(trans, dest, new_name, old_ino, btrfs_ino(BTRFS_I(new_dir)), old_idx); if (ret) @@ -9131,10 +9197,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, /* force full log commit if subvolume involved. */ btrfs_set_log_full_commit(trans); } else { - ret = btrfs_insert_inode_ref(trans, root, - old_dentry->d_name.name, - old_dentry->d_name.len, - new_ino, + ret = btrfs_insert_inode_ref(trans, root, old_name, new_ino, btrfs_ino(BTRFS_I(old_dir)), new_idx); if (ret) { @@ -9169,9 +9232,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, } else { /* src is an inode */ ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir), BTRFS_I(old_dentry->d_inode), - old_dentry->d_name.name, - old_dentry->d_name.len, - &old_rename_ctx); + old_name, &old_rename_ctx); if (!ret) ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode)); } @@ -9186,9 +9247,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, } else { /* dest is an inode */ ret = __btrfs_unlink_inode(trans, BTRFS_I(new_dir), BTRFS_I(new_dentry->d_inode), - new_dentry->d_name.name, - new_dentry->d_name.len, - &new_rename_ctx); + new_name, &new_rename_ctx); if (!ret) ret = btrfs_update_inode(trans, dest, BTRFS_I(new_inode)); } @@ -9198,16 +9257,14 @@ static int btrfs_rename_exchange(struct inode *old_dir, } ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode), - new_dentry->d_name.name, - new_dentry->d_name.len, 0, old_idx); + new_name, 0, old_idx); if (ret) { btrfs_abort_transaction(trans, ret); goto out_fail; } ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode), - old_dentry->d_name.name, - old_dentry->d_name.len, 0, new_idx); + old_name, 0, new_idx); if (ret) { btrfs_abort_transaction(trans, ret); goto out_fail; @@ -9250,6 +9307,8 @@ out_notrans: old_ino == BTRFS_FIRST_FREE_OBJECTID) up_read(&fs_info->subvol_sem); + fscrypt_free_filename(&new_fname); + fscrypt_free_filename(&old_fname); return ret; } @@ -9289,6 +9348,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, int ret; int ret2; u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); + struct fscrypt_name old_fname, new_fname; if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) return -EPERM; @@ -9305,22 +9365,28 @@ static int btrfs_rename(struct user_namespace *mnt_userns, new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) return -ENOTEMPTY; + ret = fscrypt_setup_filename(old_dir, &old_dentry->d_name, 0, &old_fname); + if (ret) + return ret; - /* check for collisions, even if the name isn't there */ - ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino, - new_dentry->d_name.name, - new_dentry->d_name.len); + ret = fscrypt_setup_filename(new_dir, &new_dentry->d_name, 0, &new_fname); + if (ret) { + fscrypt_free_filename(&old_fname); + return ret; + } + /* check for collisions, even if the name isn't there */ + ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino, &new_fname.disk_name); if (ret) { if (ret == -EEXIST) { /* we shouldn't get * eexist without a new_inode */ if (WARN_ON(!new_inode)) { - return ret; + goto out_fscrypt_names; } } else { /* maybe -EOVERFLOW */ - return ret; + goto out_fscrypt_names; } } ret = 0; @@ -9334,8 +9400,10 @@ static int btrfs_rename(struct user_namespace *mnt_userns, if (flags & RENAME_WHITEOUT) { whiteout_args.inode = new_whiteout_inode(mnt_userns, old_dir); - if (!whiteout_args.inode) - return -ENOMEM; + if (!whiteout_args.inode) { + ret = -ENOMEM; + goto out_fscrypt_names; + } ret = btrfs_new_inode_prepare(&whiteout_args, &trans_num_items); if (ret) goto out_whiteout_inode; @@ -9403,11 +9471,9 @@ static int btrfs_rename(struct user_namespace *mnt_userns, /* force full log commit if subvolume involved. */ btrfs_set_log_full_commit(trans); } else { - ret = btrfs_insert_inode_ref(trans, dest, - new_dentry->d_name.name, - new_dentry->d_name.len, - old_ino, - btrfs_ino(BTRFS_I(new_dir)), index); + ret = btrfs_insert_inode_ref(trans, dest, &new_fname.disk_name, + old_ino, btrfs_ino(BTRFS_I(new_dir)), + index); if (ret) goto out_fail; } @@ -9429,10 +9495,8 @@ static int btrfs_rename(struct user_namespace *mnt_userns, ret = btrfs_unlink_subvol(trans, old_dir, old_dentry); } else { ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir), - BTRFS_I(d_inode(old_dentry)), - old_dentry->d_name.name, - old_dentry->d_name.len, - &rename_ctx); + BTRFS_I(d_inode(old_dentry)), + &old_fname.disk_name, &rename_ctx); if (!ret) ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode)); } @@ -9451,8 +9515,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, } else { ret = btrfs_unlink_inode(trans, BTRFS_I(new_dir), BTRFS_I(d_inode(new_dentry)), - new_dentry->d_name.name, - new_dentry->d_name.len); + &new_fname.disk_name); } if (!ret && new_inode->i_nlink == 0) ret = btrfs_orphan_add(trans, @@ -9464,8 +9527,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, } ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode), - new_dentry->d_name.name, - new_dentry->d_name.len, 0, index); + &new_fname.disk_name, 0, index); if (ret) { btrfs_abort_transaction(trans, ret); goto out_fail; @@ -9500,6 +9562,9 @@ out_notrans: out_whiteout_inode: if (flags & RENAME_WHITEOUT) iput(whiteout_args.inode); +out_fscrypt_names: + fscrypt_free_filename(&old_fname); + fscrypt_free_filename(&new_fname); return ret; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2e29fafe0e7d9..9e323420c96d3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -951,6 +951,7 @@ static noinline int btrfs_mksubvol(const struct path *parent, struct inode *dir = d_inode(parent->dentry); struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); struct dentry *dentry; + struct fscrypt_str name_str = FSTR_INIT((char *)name, namelen); int error; error = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); @@ -971,8 +972,7 @@ static noinline int btrfs_mksubvol(const struct path *parent, * check for them now when we can safely fail */ error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root, - dir->i_ino, name, - namelen); + dir->i_ino, &name_str); if (error) goto out_dput; @@ -3782,6 +3782,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) struct btrfs_trans_handle *trans; struct btrfs_path *path = NULL; struct btrfs_disk_key disk_key; + struct fscrypt_str name = FSTR_INIT("default", 7); u64 objectid = 0; u64 dir_id; int ret; @@ -3825,7 +3826,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) dir_id = btrfs_super_root_dir(fs_info->super_copy); di = btrfs_lookup_dir_item(trans, fs_info->tree_root, path, - dir_id, "default", 7, 1); + dir_id, &name, 1); if (IS_ERR_OR_NULL(di)) { btrfs_release_path(path); btrfs_end_transaction(trans); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index e1f599d7a9164..7d783f0943068 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -327,9 +327,8 @@ out: } int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, - u64 ref_id, u64 dirid, u64 *sequence, const char *name, - int name_len) - + u64 ref_id, u64 dirid, u64 *sequence, + const struct fscrypt_str *name) { struct btrfs_root *tree_root = trans->fs_info->tree_root; struct btrfs_path *path; @@ -356,8 +355,8 @@ again: struct btrfs_root_ref); ptr = (unsigned long)(ref + 1); if ((btrfs_root_ref_dirid(leaf, ref) != dirid) || - (btrfs_root_ref_name_len(leaf, ref) != name_len) || - memcmp_extent_buffer(leaf, name, ptr, name_len)) { + (btrfs_root_ref_name_len(leaf, ref) != name->len) || + memcmp_extent_buffer(leaf, name->name, ptr, name->len)) { ret = -ENOENT; goto out; } @@ -400,8 +399,8 @@ out: * Will return 0, -ENOMEM, or anything from the CoW path */ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, - u64 ref_id, u64 dirid, u64 sequence, const char *name, - int name_len) + u64 ref_id, u64 dirid, u64 sequence, + const struct fscrypt_str *name) { struct btrfs_root *tree_root = trans->fs_info->tree_root; struct btrfs_key key; @@ -420,7 +419,7 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, key.offset = ref_id; again: ret = btrfs_insert_empty_item(trans, tree_root, path, &key, - sizeof(*ref) + name_len); + sizeof(*ref) + name->len); if (ret) { btrfs_abort_transaction(trans, ret); btrfs_free_path(path); @@ -431,9 +430,9 @@ again: ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); btrfs_set_root_ref_dirid(leaf, ref, dirid); btrfs_set_root_ref_sequence(leaf, ref, sequence); - btrfs_set_root_ref_name_len(leaf, ref, name_len); + btrfs_set_root_ref_name_len(leaf, ref, name->len); ptr = (unsigned long)(ref + 1); - write_extent_buffer(leaf, name, ptr, name_len); + write_extent_buffer(leaf, name->name, ptr, name->len); btrfs_mark_buffer_dirty(leaf); if (key.type == BTRFS_ROOT_BACKREF_KEY) { diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 35e889fe2a95d..547b5c2292186 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1596,13 +1596,17 @@ static int gen_unique_name(struct send_ctx *sctx, return -ENOMEM; while (1) { + struct fscrypt_str tmp_name; + len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", ino, gen, idx); ASSERT(len < sizeof(tmp)); + tmp_name.name = tmp; + tmp_name.len = strlen(tmp); di = btrfs_lookup_dir_item(NULL, sctx->send_root, path, BTRFS_FIRST_FREE_OBJECTID, - tmp, strlen(tmp), 0); + &tmp_name, 0); btrfs_release_path(path); if (IS_ERR(di)) { ret = PTR_ERR(di); @@ -1622,7 +1626,7 @@ static int gen_unique_name(struct send_ctx *sctx, di = btrfs_lookup_dir_item(NULL, sctx->parent_root, path, BTRFS_FIRST_FREE_OBJECTID, - tmp, strlen(tmp), 0); + &tmp_name, 0); btrfs_release_path(path); if (IS_ERR(di)) { ret = PTR_ERR(di); @@ -1752,13 +1756,13 @@ static int lookup_dir_item_inode(struct btrfs_root *root, struct btrfs_dir_item *di; struct btrfs_key key; struct btrfs_path *path; + struct fscrypt_str name_str = FSTR_INIT((char *)name, name_len); path = alloc_path_for_send(); if (!path) return -ENOMEM; - di = btrfs_lookup_dir_item(NULL, root, path, - dir, name, name_len, 0); + di = btrfs_lookup_dir_item(NULL, root, path, dir, &name_str, 0); if (IS_ERR_OR_NULL(di)) { ret = di ? PTR_ERR(di) : -ENOENT; goto out; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 582b71b7fa779..2c562febd801e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1398,6 +1398,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec struct btrfs_dir_item *di; struct btrfs_path *path; struct btrfs_key location; + struct fscrypt_str name = FSTR_INIT("default", 7); u64 dir_id; path = btrfs_alloc_path(); @@ -1410,7 +1411,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec * to mount. */ dir_id = btrfs_super_root_dir(fs_info->super_copy); - di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); + di = btrfs_lookup_dir_item(NULL, root, path, dir_id, &name, 0); if (IS_ERR(di)) { btrfs_free_path(path); return PTR_ERR(di); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a555567594418..1193214ba8c10 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -1627,10 +1628,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *root = pending->root; struct btrfs_root *parent_root; struct btrfs_block_rsv *rsv; - struct inode *parent_inode; + struct inode *parent_inode = pending->dir; struct btrfs_path *path; struct btrfs_dir_item *dir_item; - struct dentry *dentry; struct extent_buffer *tmp; struct extent_buffer *old; struct timespec64 cur_time; @@ -1639,6 +1639,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, u64 index = 0; u64 objectid; u64 root_flags; + unsigned int nofs_flags; + struct fscrypt_name fname; ASSERT(pending->path); path = pending->path; @@ -1646,9 +1648,22 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ASSERT(pending->root_item); new_root_item = pending->root_item; + /* + * We're inside a transaction and must make sure that any potential + * allocations with GFP_KERNEL in fscrypt won't recurse back to + * filesystem. + */ + nofs_flags = memalloc_nofs_save(); + pending->error = fscrypt_setup_filename(parent_inode, + &pending->dentry->d_name, 0, + &fname); + memalloc_nofs_restore(nofs_flags); + if (pending->error) + goto free_pending; + pending->error = btrfs_get_free_objectid(tree_root, &objectid); if (pending->error) - goto no_free_objectid; + goto free_fname; /* * Make qgroup to skip current new snapshot's qgroupid, as it is @@ -1677,8 +1692,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, trace_btrfs_space_reservation(fs_info, "transaction", trans->transid, trans->bytes_reserved, 1); - dentry = pending->dentry; - parent_inode = pending->dir; parent_root = BTRFS_I(parent_inode)->root; ret = record_root_in_trans(trans, parent_root, 0); if (ret) @@ -1694,8 +1707,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, /* check if there is a file/dir which has the same name. */ dir_item = btrfs_lookup_dir_item(NULL, parent_root, path, btrfs_ino(BTRFS_I(parent_inode)), - dentry->d_name.name, - dentry->d_name.len, 0); + &fname.disk_name, 0); if (dir_item != NULL && !IS_ERR(dir_item)) { pending->error = -EEXIST; goto dir_item_existed; @@ -1790,7 +1802,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ret = btrfs_add_root_ref(trans, objectid, parent_root->root_key.objectid, btrfs_ino(BTRFS_I(parent_inode)), index, - dentry->d_name.name, dentry->d_name.len); + &fname.disk_name); if (ret) { btrfs_abort_transaction(trans, ret); goto fail; @@ -1822,9 +1834,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, if (ret < 0) goto fail; - ret = btrfs_insert_dir_item(trans, dentry->d_name.name, - dentry->d_name.len, BTRFS_I(parent_inode), - &key, BTRFS_FT_DIR, index); + ret = btrfs_insert_dir_item(trans, &fname.disk_name, + BTRFS_I(parent_inode), &key, BTRFS_FT_DIR, + index); /* We have check then name at the beginning, so it is impossible. */ BUG_ON(ret == -EEXIST || ret == -EOVERFLOW); if (ret) { @@ -1833,7 +1845,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, } btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size + - dentry->d_name.len * 2); + fname.disk_name.len * 2); parent_inode->i_mtime = current_time(parent_inode); parent_inode->i_ctime = parent_inode->i_mtime; ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode)); @@ -1865,7 +1877,9 @@ dir_item_existed: trans->bytes_reserved = 0; clear_skip_qgroup: btrfs_clear_skip_qgroup(trans); -no_free_objectid: +free_fname: + fscrypt_free_filename(&fname); +free_pending: kfree(new_root_item); pending->root_item = NULL; btrfs_free_path(path); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 00be69ce7b90f..c03ff6a5a7f6b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -595,6 +595,21 @@ static int overwrite_item(struct btrfs_trans_handle *trans, return do_overwrite_item(trans, root, path, eb, slot, key); } +static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len, + struct fscrypt_str *name) +{ + char *buf; + + buf = kmalloc(len, GFP_NOFS); + if (!buf) + return -ENOMEM; + + read_extent_buffer(eb, buf, (unsigned long)start, len); + name->name = buf; + name->len = len; + return 0; +} + /* * simple helper to read an inode off the disk from a given root * This can only be called for subvolume roots and not for the log @@ -901,12 +916,11 @@ out: static int unlink_inode_for_log_replay(struct btrfs_trans_handle *trans, struct btrfs_inode *dir, struct btrfs_inode *inode, - const char *name, - int name_len) + const struct fscrypt_str *name) { int ret; - ret = btrfs_unlink_inode(trans, dir, inode, name, name_len); + ret = btrfs_unlink_inode(trans, dir, inode, name); if (ret) return ret; /* @@ -933,8 +947,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, { struct btrfs_root *root = dir->root; struct inode *inode; - char *name; - int name_len; + struct fscrypt_str name; struct extent_buffer *leaf; struct btrfs_key location; int ret; @@ -942,12 +955,10 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; btrfs_dir_item_key_to_cpu(leaf, di, &location); - name_len = btrfs_dir_name_len(leaf, di); - name = kmalloc(name_len, GFP_NOFS); - if (!name) + ret = read_alloc_one_name(leaf, di + 1, btrfs_dir_name_len(leaf, di), &name); + if (ret) return -ENOMEM; - read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); btrfs_release_path(path); inode = read_one_inode(root, location.objectid); @@ -960,10 +971,9 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, if (ret) goto out; - ret = unlink_inode_for_log_replay(trans, dir, BTRFS_I(inode), name, - name_len); + ret = unlink_inode_for_log_replay(trans, dir, BTRFS_I(inode), &name); out: - kfree(name); + kfree(name.name); iput(inode); return ret; } @@ -978,14 +988,14 @@ out: static noinline int inode_in_dir(struct btrfs_root *root, struct btrfs_path *path, u64 dirid, u64 objectid, u64 index, - const char *name, int name_len) + struct fscrypt_str *name) { struct btrfs_dir_item *di; struct btrfs_key location; int ret = 0; di = btrfs_lookup_dir_index_item(NULL, root, path, dirid, - index, name, name_len, 0); + index, name, 0); if (IS_ERR(di)) { ret = PTR_ERR(di); goto out; @@ -998,7 +1008,7 @@ static noinline int inode_in_dir(struct btrfs_root *root, } btrfs_release_path(path); - di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0); + di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, 0); if (IS_ERR(di)) { ret = PTR_ERR(di); goto out; @@ -1025,7 +1035,7 @@ out: static noinline int backref_in_log(struct btrfs_root *log, struct btrfs_key *key, u64 ref_objectid, - const char *name, int namelen) + const struct fscrypt_str *name) { struct btrfs_path *path; int ret; @@ -1045,12 +1055,10 @@ static noinline int backref_in_log(struct btrfs_root *log, if (key->type == BTRFS_INODE_EXTREF_KEY) ret = !!btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0], - ref_objectid, - name, namelen); + ref_objectid, name); else ret = !!btrfs_find_name_in_backref(path->nodes[0], - path->slots[0], - name, namelen); + path->slots[0], name); out: btrfs_free_path(path); return ret; @@ -1063,11 +1071,9 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, struct btrfs_inode *dir, struct btrfs_inode *inode, u64 inode_objectid, u64 parent_objectid, - u64 ref_index, char *name, int namelen) + u64 ref_index, struct fscrypt_str *name) { int ret; - char *victim_name; - int victim_name_len; struct extent_buffer *leaf; struct btrfs_dir_item *di; struct btrfs_key search_key; @@ -1099,43 +1105,40 @@ again: ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); ptr_end = ptr + btrfs_item_size(leaf, path->slots[0]); while (ptr < ptr_end) { - victim_ref = (struct btrfs_inode_ref *)ptr; - victim_name_len = btrfs_inode_ref_name_len(leaf, - victim_ref); - victim_name = kmalloc(victim_name_len, GFP_NOFS); - if (!victim_name) - return -ENOMEM; + struct fscrypt_str victim_name; - read_extent_buffer(leaf, victim_name, - (unsigned long)(victim_ref + 1), - victim_name_len); + victim_ref = (struct btrfs_inode_ref *)ptr; + ret = read_alloc_one_name(leaf, (victim_ref + 1), + btrfs_inode_ref_name_len(leaf, victim_ref), + &victim_name); + if (ret) + return ret; ret = backref_in_log(log_root, &search_key, - parent_objectid, victim_name, - victim_name_len); + parent_objectid, &victim_name); if (ret < 0) { - kfree(victim_name); + kfree(victim_name.name); return ret; } else if (!ret) { inc_nlink(&inode->vfs_inode); btrfs_release_path(path); ret = unlink_inode_for_log_replay(trans, dir, inode, - victim_name, victim_name_len); - kfree(victim_name); + &victim_name); + kfree(victim_name.name); if (ret) return ret; goto again; } - kfree(victim_name); + kfree(victim_name.name); - ptr = (unsigned long)(victim_ref + 1) + victim_name_len; + ptr = (unsigned long)(victim_ref + 1) + victim_name.len; } } btrfs_release_path(path); /* Same search but for extended refs */ - extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen, + extref = btrfs_lookup_inode_extref(NULL, root, path, name, inode_objectid, parent_objectid, 0, 0); if (IS_ERR(extref)) { @@ -1152,29 +1155,28 @@ again: base = btrfs_item_ptr_offset(leaf, path->slots[0]); while (cur_offset < item_size) { - extref = (struct btrfs_inode_extref *)(base + cur_offset); + struct fscrypt_str victim_name; - victim_name_len = btrfs_inode_extref_name_len(leaf, extref); + extref = (struct btrfs_inode_extref *)(base + cur_offset); if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) goto next; - victim_name = kmalloc(victim_name_len, GFP_NOFS); - if (!victim_name) - return -ENOMEM; - read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name, - victim_name_len); + ret = read_alloc_one_name(leaf, &extref->name, + btrfs_inode_extref_name_len(leaf, extref), + &victim_name); + if (ret) + return ret; search_key.objectid = inode_objectid; search_key.type = BTRFS_INODE_EXTREF_KEY; search_key.offset = btrfs_extref_hash(parent_objectid, - victim_name, - victim_name_len); + victim_name.name, + victim_name.len); ret = backref_in_log(log_root, &search_key, - parent_objectid, victim_name, - victim_name_len); + parent_objectid, &victim_name); if (ret < 0) { - kfree(victim_name); + kfree(victim_name.name); return ret; } else if (!ret) { ret = -ENOENT; @@ -1186,26 +1188,24 @@ again: ret = unlink_inode_for_log_replay(trans, BTRFS_I(victim_parent), - inode, - victim_name, - victim_name_len); + inode, &victim_name); } iput(victim_parent); - kfree(victim_name); + kfree(victim_name.name); if (ret) return ret; goto again; } - kfree(victim_name); + kfree(victim_name.name); next: - cur_offset += victim_name_len + sizeof(*extref); + cur_offset += victim_name.len + sizeof(*extref); } } btrfs_release_path(path); /* look for a conflicting sequence number */ di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), - ref_index, name, namelen, 0); + ref_index, name, 0); if (IS_ERR(di)) { return PTR_ERR(di); } else if (di) { @@ -1216,8 +1216,7 @@ next: btrfs_release_path(path); /* look for a conflicting name */ - di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), - name, namelen, 0); + di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), name, 0); if (IS_ERR(di)) { return PTR_ERR(di); } else if (di) { @@ -1231,20 +1230,18 @@ next: } static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, - u32 *namelen, char **name, u64 *index, + struct fscrypt_str *name, u64 *index, u64 *parent_objectid) { struct btrfs_inode_extref *extref; + int ret; extref = (struct btrfs_inode_extref *)ref_ptr; - *namelen = btrfs_inode_extref_name_len(eb, extref); - *name = kmalloc(*namelen, GFP_NOFS); - if (*name == NULL) - return -ENOMEM; - - read_extent_buffer(eb, *name, (unsigned long)&extref->name, - *namelen); + ret = read_alloc_one_name(eb, &extref->name, + btrfs_inode_extref_name_len(eb, extref), name); + if (ret) + return ret; if (index) *index = btrfs_inode_extref_index(eb, extref); @@ -1255,18 +1252,17 @@ static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, } static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, - u32 *namelen, char **name, u64 *index) + struct fscrypt_str *name, u64 *index) { struct btrfs_inode_ref *ref; + int ret; ref = (struct btrfs_inode_ref *)ref_ptr; - *namelen = btrfs_inode_ref_name_len(eb, ref); - *name = kmalloc(*namelen, GFP_NOFS); - if (*name == NULL) - return -ENOMEM; - - read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen); + ret = read_alloc_one_name(eb, ref + 1, btrfs_inode_ref_name_len(eb, ref), + name); + if (ret) + return ret; if (index) *index = btrfs_inode_ref_index(eb, ref); @@ -1308,28 +1304,24 @@ again: ref_ptr = btrfs_item_ptr_offset(eb, path->slots[0]); ref_end = ref_ptr + btrfs_item_size(eb, path->slots[0]); while (ref_ptr < ref_end) { - char *name = NULL; - int namelen; + struct fscrypt_str name; u64 parent_id; if (key->type == BTRFS_INODE_EXTREF_KEY) { - ret = extref_get_fields(eb, ref_ptr, &namelen, &name, + ret = extref_get_fields(eb, ref_ptr, &name, NULL, &parent_id); } else { parent_id = key->offset; - ret = ref_get_fields(eb, ref_ptr, &namelen, &name, - NULL); + ret = ref_get_fields(eb, ref_ptr, &name, NULL); } if (ret) goto out; if (key->type == BTRFS_INODE_EXTREF_KEY) ret = !!btrfs_find_name_in_ext_backref(log_eb, log_slot, - parent_id, name, - namelen); + parent_id, &name); else - ret = !!btrfs_find_name_in_backref(log_eb, log_slot, - name, namelen); + ret = !!btrfs_find_name_in_backref(log_eb, log_slot, &name); if (!ret) { struct inode *dir; @@ -1338,20 +1330,20 @@ again: dir = read_one_inode(root, parent_id); if (!dir) { ret = -ENOENT; - kfree(name); + kfree(name.name); goto out; } ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir), - inode, name, namelen); - kfree(name); + inode, &name); + kfree(name.name); iput(dir); if (ret) goto out; goto again; } - kfree(name); - ref_ptr += namelen; + kfree(name.name); + ref_ptr += name.len; if (key->type == BTRFS_INODE_EXTREF_KEY) ref_ptr += sizeof(struct btrfs_inode_extref); else @@ -1380,8 +1372,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, struct inode *inode = NULL; unsigned long ref_ptr; unsigned long ref_end; - char *name = NULL; - int namelen; + struct fscrypt_str name; int ret; int log_ref_ver = 0; u64 parent_objectid; @@ -1425,7 +1416,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, while (ref_ptr < ref_end) { if (log_ref_ver) { - ret = extref_get_fields(eb, ref_ptr, &namelen, &name, + ret = extref_get_fields(eb, ref_ptr, &name, &ref_index, &parent_objectid); /* * parent object can change from one array @@ -1438,15 +1429,13 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, goto out; } } else { - ret = ref_get_fields(eb, ref_ptr, &namelen, &name, - &ref_index); + ret = ref_get_fields(eb, ref_ptr, &name, &ref_index); } if (ret) goto out; ret = inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)), - btrfs_ino(BTRFS_I(inode)), ref_index, - name, namelen); + btrfs_ino(BTRFS_I(inode)), ref_index, &name); if (ret < 0) { goto out; } else if (ret == 0) { @@ -1460,7 +1449,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, ret = __add_inode_ref(trans, root, path, log, BTRFS_I(dir), BTRFS_I(inode), inode_objectid, parent_objectid, - ref_index, name, namelen); + ref_index, &name); if (ret) { if (ret == 1) ret = 0; @@ -1469,7 +1458,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, /* insert our name */ ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), - name, namelen, 0, ref_index); + &name, 0, ref_index); if (ret) goto out; @@ -1479,9 +1468,9 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, } /* Else, ret == 1, we already have a perfect match, we're done. */ - ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen; - kfree(name); - name = NULL; + ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + name.len; + kfree(name.name); + name.name = NULL; if (log_ref_ver) { iput(dir); dir = NULL; @@ -1505,7 +1494,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, ret = overwrite_item(trans, root, path, eb, slot, key); out: btrfs_release_path(path); - kfree(name); + kfree(name.name); iput(dir); iput(inode); return ret; @@ -1777,7 +1766,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, static noinline int insert_one_name(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 dirid, u64 index, - char *name, int name_len, + const struct fscrypt_str *name, struct btrfs_key *location) { struct inode *inode; @@ -1795,7 +1784,7 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans, } ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name, - name_len, 1, index); + 1, index); /* FIXME, put inode into FIXUP list */ @@ -1855,8 +1844,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, struct btrfs_dir_item *di, struct btrfs_key *key) { - char *name; - int name_len; + struct fscrypt_str name; struct btrfs_dir_item *dir_dst_di; struct btrfs_dir_item *index_dst_di; bool dir_dst_matches = false; @@ -1874,17 +1862,11 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, if (!dir) return -EIO; - name_len = btrfs_dir_name_len(eb, di); - name = kmalloc(name_len, GFP_NOFS); - if (!name) { - ret = -ENOMEM; + ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name); + if (ret) goto out; - } log_type = btrfs_dir_type(eb, di); - read_extent_buffer(eb, name, (unsigned long)(di + 1), - name_len); - btrfs_dir_item_key_to_cpu(eb, di, &log_key); ret = btrfs_lookup_inode(trans, root, path, &log_key, 0); btrfs_release_path(path); @@ -1894,7 +1876,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, ret = 0; dir_dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, - name, name_len, 1); + &name, 1); if (IS_ERR(dir_dst_di)) { ret = PTR_ERR(dir_dst_di); goto out; @@ -1911,7 +1893,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, index_dst_di = btrfs_lookup_dir_index_item(trans, root, path, key->objectid, key->offset, - name, name_len, 1); + &name, 1); if (IS_ERR(index_dst_di)) { ret = PTR_ERR(index_dst_di); goto out; @@ -1939,7 +1921,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, search_key.objectid = log_key.objectid; search_key.type = BTRFS_INODE_REF_KEY; search_key.offset = key->objectid; - ret = backref_in_log(root->log_root, &search_key, 0, name, name_len); + ret = backref_in_log(root->log_root, &search_key, 0, &name); if (ret < 0) { goto out; } else if (ret) { @@ -1952,8 +1934,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, search_key.objectid = log_key.objectid; search_key.type = BTRFS_INODE_EXTREF_KEY; search_key.offset = key->objectid; - ret = backref_in_log(root->log_root, &search_key, key->objectid, name, - name_len); + ret = backref_in_log(root->log_root, &search_key, key->objectid, &name); if (ret < 0) { goto out; } else if (ret) { @@ -1964,7 +1945,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, } btrfs_release_path(path); ret = insert_one_name(trans, root, key->objectid, key->offset, - name, name_len, &log_key); + &name, &log_key); if (ret && ret != -ENOENT && ret != -EEXIST) goto out; if (!ret) @@ -1974,10 +1955,10 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, out: if (!ret && update_size) { - btrfs_i_size_write(BTRFS_I(dir), dir->i_size + name_len * 2); + btrfs_i_size_write(BTRFS_I(dir), dir->i_size + name.len * 2); ret = btrfs_update_inode(trans, root, BTRFS_I(dir)); } - kfree(name); + kfree(name.name); iput(dir); if (!ret && name_added) ret = 1; @@ -2143,8 +2124,7 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, struct extent_buffer *eb; int slot; struct btrfs_dir_item *di; - int name_len; - char *name; + struct fscrypt_str name; struct inode *inode = NULL; struct btrfs_key location; @@ -2159,22 +2139,16 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, eb = path->nodes[0]; slot = path->slots[0]; di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); - name_len = btrfs_dir_name_len(eb, di); - name = kmalloc(name_len, GFP_NOFS); - if (!name) { - ret = -ENOMEM; + ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name); + if (ret) goto out; - } - - read_extent_buffer(eb, name, (unsigned long)(di + 1), name_len); if (log) { struct btrfs_dir_item *log_di; log_di = btrfs_lookup_dir_index_item(trans, log, log_path, dir_key->objectid, - dir_key->offset, - name, name_len, 0); + dir_key->offset, &name, 0); if (IS_ERR(log_di)) { ret = PTR_ERR(log_di); goto out; @@ -2200,7 +2174,7 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, inc_nlink(inode); ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir), BTRFS_I(inode), - name, name_len); + &name); /* * Unlike dir item keys, dir index keys can only have one name (entry) in * them, as there are no key collisions since each key has a unique offset @@ -2209,7 +2183,7 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, out: btrfs_release_path(path); btrfs_release_path(log_path); - kfree(name); + kfree(name.name); iput(inode); return ret; } @@ -3443,7 +3417,7 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans, struct btrfs_root *log, struct btrfs_path *path, u64 dir_ino, - const char *name, int name_len, + const struct fscrypt_str *name, u64 index) { struct btrfs_dir_item *di; @@ -3453,7 +3427,7 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans, * for dir item keys. */ di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino, - index, name, name_len, -1); + index, name, -1); if (IS_ERR(di)) return PTR_ERR(di); else if (!di) @@ -3490,7 +3464,7 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans, */ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, - const char *name, int name_len, + const struct fscrypt_str *name, struct btrfs_inode *dir, u64 index) { struct btrfs_path *path; @@ -3517,7 +3491,7 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, } ret = del_logged_dentry(trans, root->log_root, path, btrfs_ino(dir), - name, name_len, index); + name, index); btrfs_free_path(path); out_unlock: mutex_unlock(&dir->log_mutex); @@ -3529,7 +3503,7 @@ out_unlock: /* see comments for btrfs_del_dir_entries_in_log */ void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, - const char *name, int name_len, + const struct fscrypt_str *name, struct btrfs_inode *inode, u64 dirid) { struct btrfs_root *log; @@ -3550,7 +3524,7 @@ void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, log = root->log_root; mutex_lock(&inode->log_mutex); - ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode), + ret = btrfs_del_inode_ref(trans, log, name, btrfs_ino(inode), dirid, &index); mutex_unlock(&inode->log_mutex); if (ret < 0 && ret != -ENOENT) @@ -5293,6 +5267,7 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb, u32 this_len; unsigned long name_ptr; struct btrfs_dir_item *di; + struct fscrypt_str name_str; if (key->type == BTRFS_INODE_REF_KEY) { struct btrfs_inode_ref *iref; @@ -5326,8 +5301,11 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb, } read_extent_buffer(eb, name, name_ptr, this_name_len); + + name_str.name = name; + name_str.len = this_name_len; di = btrfs_lookup_dir_item(NULL, inode->root, search_path, - parent, name, this_name_len, 0); + parent, &name_str, 0); if (di && !IS_ERR(di)) { struct btrfs_key di_key; @@ -7493,9 +7471,14 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, if (old_dir && old_dir->logged_trans == trans->transid) { struct btrfs_root *log = old_dir->root->log_root; struct btrfs_path *path; + struct fscrypt_name fname; ASSERT(old_dir_index >= BTRFS_DIR_START_INDEX); + ret = fscrypt_setup_filename(&old_dir->vfs_inode, + &old_dentry->d_name, 0, &fname); + if (ret) + goto out; /* * We have two inodes to update in the log, the old directory and * the inode that got renamed, so we must pin the log to prevent @@ -7508,13 +7491,17 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, * not fail, but if it does, it's not serious, just bail out and * mark the log for a full commit. */ - if (WARN_ON_ONCE(ret < 0)) + if (WARN_ON_ONCE(ret < 0)) { + fscrypt_free_filename(&fname); goto out; + } + log_pinned = true; path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; + fscrypt_free_filename(&fname); goto out; } @@ -7530,8 +7517,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, */ mutex_lock(&old_dir->log_mutex); ret = del_logged_dentry(trans, log, path, btrfs_ino(old_dir), - old_dentry->d_name.name, - old_dentry->d_name.len, old_dir_index); + &fname.disk_name, old_dir_index); if (ret > 0) { /* * The dentry does not exist in the log, so record its @@ -7545,6 +7531,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, mutex_unlock(&old_dir->log_mutex); btrfs_free_path(path); + fscrypt_free_filename(&fname); if (ret < 0) goto out; } diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index bcca74128c3bb..8adebf4c9adaf 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -84,11 +84,11 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, struct btrfs_log_ctx *ctx); void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, - const char *name, int name_len, + const struct fscrypt_str *name, struct btrfs_inode *dir, u64 index); void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, - const char *name, int name_len, + const struct fscrypt_str *name, struct btrfs_inode *inode, u64 dirid); void btrfs_end_log_trans(struct btrfs_root *root); void btrfs_pin_log_trans(struct btrfs_root *root); diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 5cd612a8f8584..49addc345aebe 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -217,9 +217,12 @@ again: strm->buf.out_size = min_t(u32, outlen, PAGE_SIZE - pageofs); outlen -= strm->buf.out_size; - if (!rq->out[no] && rq->fillgaps) /* deduped */ + if (!rq->out[no] && rq->fillgaps) { /* deduped */ rq->out[no] = erofs_allocpage(pagepool, GFP_KERNEL | __GFP_NOFAIL); + set_page_private(rq->out[no], + Z_EROFS_SHORTLIVED_PAGE); + } if (rq->out[no]) strm->buf.out = kmap(rq->out[no]) + pageofs; pageofs = 0; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index be570c65ae154..e1297c6bcfbe2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7157,7 +7157,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) { struct nfs4_lockdata *data = calldata; struct nfs4_lock_state *lsp = data->lsp; - struct nfs_server *server = NFS_SERVER(d_inode(data->ctx->dentry)); if (!nfs4_sequence_done(task, &data->res.seq_res)) return; @@ -7165,7 +7164,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) data->rpc_status = task->tk_status; switch (task->tk_status) { case 0: - renew_lease(server, data->timestamp); + renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)), + data->timestamp); if (data->arg.new_lock && !data->cancelled) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) @@ -7193,8 +7193,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) if (!nfs4_stateid_match(&data->arg.open_stateid, &lsp->ls_state->open_stateid)) goto out_restart; - else if (nfs4_async_handle_error(task, server, lsp->ls_state, NULL) == -EAGAIN) - goto out_restart; } else if (!nfs4_stateid_match(&data->arg.lock_stateid, &lsp->ls_stateid)) goto out_restart; @@ -10629,7 +10627,9 @@ static void nfs4_disable_swap(struct inode *inode) */ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; - nfs4_schedule_state_manager(clp); + set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); + wake_up_var(&clp->cl_state); } static const struct inode_operations nfs4_dir_inode_operations = { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5b49e5365bb30..457b2b2f804ab 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1209,17 +1209,23 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) { struct task_struct *task; char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; - struct rpc_clnt *cl = clp->cl_rpcclient; - - while (cl != cl->cl_parent) - cl = cl->cl_parent; + struct rpc_clnt *clnt = clp->cl_rpcclient; + bool swapon = false; set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); - if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) { - wake_up_var(&clp->cl_state); - return; + + if (atomic_read(&clnt->cl_swapper)) { + swapon = !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, + &clp->cl_state); + if (!swapon) { + wake_up_var(&clp->cl_state); + return; + } } - set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); + + if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) + return; + __module_get(THIS_MODULE); refcount_inc(&clp->cl_count); @@ -1236,8 +1242,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) __func__, PTR_ERR(task)); if (!nfs_client_init_is_complete(clp)) nfs_mark_client_ready(clp, PTR_ERR(task)); + if (swapon) + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); nfs4_clear_state_manager_bit(clp); - clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); nfs_put_client(clp); module_put(THIS_MODULE); } @@ -2703,6 +2710,13 @@ static void nfs4_state_manager(struct nfs_client *clp) nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); + if (test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) && + !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, + &clp->cl_state)) { + memflags = memalloc_nofs_save(); + continue; + } + if (!test_and_set_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state)) { if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) { nfs_client_return_marked_delegations(clp); @@ -2741,22 +2755,25 @@ static int nfs4_run_state_manager(void *ptr) allow_signal(SIGKILL); again: - set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); nfs4_state_manager(clp); - if (atomic_read(&cl->cl_swapper)) { + + if (test_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) && + !test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) { wait_var_event_interruptible(&clp->cl_state, test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state)); - if (atomic_read(&cl->cl_swapper) && - test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state)) + if (!atomic_read(&cl->cl_swapper)) + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); + if (refcount_read(&clp->cl_count) > 1 && !signalled() && + !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) goto again; /* Either no longer a swapper, or were signalled */ + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); } - clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); if (refcount_read(&clp->cl_count) > 1 && !signalled() && test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) && - !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state)) + !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) goto again; nfs_put_client(clp); diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index a6f7403669631..edb535a0ff973 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -18,7 +18,7 @@ #include "sysfs.h" struct kobject *nfs_client_kobj; -static struct kset *nfs_client_kset; +static struct kset *nfs_kset; static void nfs_netns_object_release(struct kobject *kobj) { @@ -55,13 +55,13 @@ static struct kobject *nfs_netns_object_alloc(const char *name, int nfs_sysfs_init(void) { - nfs_client_kset = kset_create_and_add("nfs", NULL, fs_kobj); - if (!nfs_client_kset) + nfs_kset = kset_create_and_add("nfs", NULL, fs_kobj); + if (!nfs_kset) return -ENOMEM; - nfs_client_kobj = nfs_netns_object_alloc("net", nfs_client_kset, NULL); + nfs_client_kobj = nfs_netns_object_alloc("net", nfs_kset, NULL); if (!nfs_client_kobj) { - kset_unregister(nfs_client_kset); - nfs_client_kset = NULL; + kset_unregister(nfs_kset); + nfs_kset = NULL; return -ENOMEM; } return 0; @@ -70,7 +70,7 @@ int nfs_sysfs_init(void) void nfs_sysfs_exit(void) { kobject_put(nfs_client_kobj); - kset_unregister(nfs_client_kset); + kset_unregister(nfs_kset); } static ssize_t nfs_netns_identifier_show(struct kobject *kobj, @@ -159,7 +159,7 @@ static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent, p = kzalloc(sizeof(*p), GFP_KERNEL); if (p) { p->net = net; - p->kobject.kset = nfs_client_kset; + p->kobject.kset = nfs_kset; if (kobject_init_and_add(&p->kobject, &nfs_netns_client_type, parent, "nfs_client") == 0) return p; diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 57da4f23c1e43..acb8951eb7576 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -2901,9 +2901,9 @@ bind_socket(struct TCP_Server_Info *server) if (server->srcaddr.ss_family != AF_UNSPEC) { /* Bind to the specified local IP address */ struct socket *socket = server->ssocket; - rc = socket->ops->bind(socket, - (struct sockaddr *) &server->srcaddr, - sizeof(server->srcaddr)); + rc = kernel_bind(socket, + (struct sockaddr *) &server->srcaddr, + sizeof(server->srcaddr)); if (rc < 0) { struct sockaddr_in *saddr4; struct sockaddr_in6 *saddr6; @@ -3050,8 +3050,8 @@ generic_ip_connect(struct TCP_Server_Info *server) socket->sk->sk_sndbuf, socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo); - rc = socket->ops->connect(socket, saddr, slen, - server->noblockcnt ? O_NONBLOCK : 0); + rc = kernel_connect(socket, saddr, slen, + server->noblockcnt ? O_NONBLOCK : 0); /* * When mounting SMB root file systems, we do not want to block in * connect. Otherwise bail out and then let cifs_reconnect() perform diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index e1d2be19cddfa..ff97cad8d5b45 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -84,6 +84,8 @@ struct ksmbd_conn *ksmbd_conn_alloc(void) spin_lock_init(&conn->llist_lock); INIT_LIST_HEAD(&conn->lock_list); + init_rwsem(&conn->session_lock); + down_write(&conn_list_lock); list_add(&conn->conns_list, &conn_list); up_write(&conn_list_lock); diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index ad8dfaa48ffb3..335fdd714d595 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -50,6 +50,7 @@ struct ksmbd_conn { struct nls_table *local_nls; struct unicode_map *um; struct list_head conns_list; + struct rw_semaphore session_lock; /* smb session 1 per user */ struct xarray sessions; unsigned long last_active; diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index ea4b56d570fbb..cf6621e21ba36 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -183,7 +183,7 @@ static void ksmbd_expire_session(struct ksmbd_conn *conn) unsigned long id; struct ksmbd_session *sess; - down_write(&sessions_table_lock); + down_write(&conn->session_lock); xa_for_each(&conn->sessions, id, sess) { if (sess->state != SMB2_SESSION_VALID || time_after(jiffies, @@ -194,7 +194,7 @@ static void ksmbd_expire_session(struct ksmbd_conn *conn) continue; } } - up_write(&sessions_table_lock); + up_write(&conn->session_lock); } int ksmbd_session_register(struct ksmbd_conn *conn, @@ -236,7 +236,9 @@ void ksmbd_sessions_deregister(struct ksmbd_conn *conn) } } } + up_write(&sessions_table_lock); + down_write(&conn->session_lock); xa_for_each(&conn->sessions, id, sess) { unsigned long chann_id; struct channel *chann; @@ -253,7 +255,7 @@ void ksmbd_sessions_deregister(struct ksmbd_conn *conn) ksmbd_session_destroy(sess); } } - up_write(&sessions_table_lock); + up_write(&conn->session_lock); } struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn, @@ -261,9 +263,11 @@ struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn, { struct ksmbd_session *sess; + down_read(&conn->session_lock); sess = xa_load(&conn->sessions, id); if (sess) sess->last_active = jiffies; + up_read(&conn->session_lock); return sess; } diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index f6fd5cf976a50..683152007566c 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -8128,10 +8128,10 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work) goto err_out; } - opinfo_put(opinfo); - ksmbd_fd_put(work, fp); opinfo->op_state = OPLOCK_STATE_NONE; wake_up_interruptible_all(&opinfo->oplock_q); + opinfo_put(opinfo); + ksmbd_fd_put(work, fp); rsp->StructureSize = cpu_to_le16(24); rsp->OplockLevel = rsp_oplevel; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1ed2ec035e779..1fba826f0acef 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1065,7 +1065,7 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, static inline struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info) { - return ERR_PTR(-EOPNOTSUPP); + return NULL; } static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} #define DEFINE_BPF_DISPATCHER(name) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 15d7529ac9534..9a44de45cc1f2 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -33,6 +33,7 @@ struct ipv6_devconf { __s32 accept_ra_defrtr; __u32 ra_defrtr_metric; __s32 accept_ra_min_hop_limit; + __s32 accept_ra_min_lft; __s32 accept_ra_pinfo; __s32 ignore_routes_with_linkdown; #ifdef CONFIG_IPV6_ROUTER_PREF diff --git a/include/linux/mm.h b/include/linux/mm.h index 104ec00823da8..eefb0948110ae 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1906,6 +1906,8 @@ static inline bool can_do_mlock(void) { return false; } extern int user_shm_lock(size_t, struct ucounts *); extern void user_shm_unlock(size_t, struct ucounts *); +struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr, + pte_t pte); struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte); struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, diff --git a/include/linux/netfilter/nf_conntrack_sctp.h b/include/linux/netfilter/nf_conntrack_sctp.h index 625f491b95de8..fb31312825ae5 100644 --- a/include/linux/netfilter/nf_conntrack_sctp.h +++ b/include/linux/netfilter/nf_conntrack_sctp.h @@ -9,6 +9,7 @@ struct ip_ct_sctp { enum sctp_conntrack state; __be32 vtag[IP_CT_DIR_MAX]; + u8 init[IP_CT_DIR_MAX]; u8 last_dir; u8 flags; }; diff --git a/include/linux/regulator/mt6358-regulator.h b/include/linux/regulator/mt6358-regulator.h index bdcf83cd719ef..be9f61e3e8e6d 100644 --- a/include/linux/regulator/mt6358-regulator.h +++ b/include/linux/regulator/mt6358-regulator.h @@ -48,8 +48,6 @@ enum { MT6358_ID_VLDO28, MT6358_ID_VAUD28, MT6358_ID_VSIM2, - MT6358_ID_VCORE_SSHUB, - MT6358_ID_VSRAM_OTHERS_SSHUB, MT6358_ID_RG_MAX, }; @@ -90,8 +88,6 @@ enum { MT6366_ID_VMC, MT6366_ID_VAUD28, MT6366_ID_VSIM2, - MT6366_ID_VCORE_SSHUB, - MT6366_ID_VSRAM_OTHERS_SSHUB, MT6366_ID_RG_MAX, }; diff --git a/include/net/arp.h b/include/net/arp.h index d7ef4ec71dfeb..e8747e0713c79 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -38,11 +38,11 @@ static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 { struct neighbour *n; - rcu_read_lock_bh(); + rcu_read_lock(); n = __ipv4_neigh_lookup_noref(dev, key); if (n && !refcount_inc_not_zero(&n->refcnt)) n = NULL; - rcu_read_unlock_bh(); + rcu_read_unlock(); return n; } @@ -51,10 +51,10 @@ static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key) { struct neighbour *n; - rcu_read_lock_bh(); + rcu_read_lock(); n = __ipv4_neigh_lookup_noref(dev, key); neigh_confirm(n); - rcu_read_unlock_bh(); + rcu_read_unlock(); } void arp_init(void); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5976545aa26b9..7a6c3059d50b5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5621,12 +5621,17 @@ struct cfg80211_cqm_config; * wiphy_lock - lock the wiphy * @wiphy: the wiphy to lock * - * This is mostly exposed so it can be done around registering and - * unregistering netdevs that aren't created through cfg80211 calls, - * since that requires locking in cfg80211 when the notifiers is - * called, but that cannot differentiate which way it's called. + * This is needed around registering and unregistering netdevs that + * aren't created through cfg80211 calls, since that requires locking + * in cfg80211 when the notifiers is called, but that cannot + * differentiate which way it's called. + * + * It can also be used by drivers for their own purposes. * * When cfg80211 ops are called, the wiphy is already locked. + * + * Note that this makes sure that no workers that have been queued + * with wiphy_queue_work() are running. */ static inline void wiphy_lock(struct wiphy *wiphy) __acquires(&wiphy->mtx) @@ -5646,6 +5651,88 @@ static inline void wiphy_unlock(struct wiphy *wiphy) mutex_unlock(&wiphy->mtx); } +struct wiphy_work; +typedef void (*wiphy_work_func_t)(struct wiphy *, struct wiphy_work *); + +struct wiphy_work { + struct list_head entry; + wiphy_work_func_t func; +}; + +static inline void wiphy_work_init(struct wiphy_work *work, + wiphy_work_func_t func) +{ + INIT_LIST_HEAD(&work->entry); + work->func = func; +} + +/** + * wiphy_work_queue - queue work for the wiphy + * @wiphy: the wiphy to queue for + * @work: the work item + * + * This is useful for work that must be done asynchronously, and work + * queued here has the special property that the wiphy mutex will be + * held as if wiphy_lock() was called, and that it cannot be running + * after wiphy_lock() was called. Therefore, wiphy_cancel_work() can + * use just cancel_work() instead of cancel_work_sync(), it requires + * being in a section protected by wiphy_lock(). + */ +void wiphy_work_queue(struct wiphy *wiphy, struct wiphy_work *work); + +/** + * wiphy_work_cancel - cancel previously queued work + * @wiphy: the wiphy, for debug purposes + * @work: the work to cancel + * + * Cancel the work *without* waiting for it, this assumes being + * called under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_work_cancel(struct wiphy *wiphy, struct wiphy_work *work); + +struct wiphy_delayed_work { + struct wiphy_work work; + struct wiphy *wiphy; + struct timer_list timer; +}; + +void wiphy_delayed_work_timer(struct timer_list *t); + +static inline void wiphy_delayed_work_init(struct wiphy_delayed_work *dwork, + wiphy_work_func_t func) +{ + timer_setup(&dwork->timer, wiphy_delayed_work_timer, 0); + wiphy_work_init(&dwork->work, func); +} + +/** + * wiphy_delayed_work_queue - queue delayed work for the wiphy + * @wiphy: the wiphy to queue for + * @dwork: the delayable worker + * @delay: number of jiffies to wait before queueing + * + * This is useful for work that must be done asynchronously, and work + * queued here has the special property that the wiphy mutex will be + * held as if wiphy_lock() was called, and that it cannot be running + * after wiphy_lock() was called. Therefore, wiphy_cancel_work() can + * use just cancel_work() instead of cancel_work_sync(), it requires + * being in a section protected by wiphy_lock(). + */ +void wiphy_delayed_work_queue(struct wiphy *wiphy, + struct wiphy_delayed_work *dwork, + unsigned long delay); + +/** + * wiphy_delayed_work_cancel - cancel previously queued delayed work + * @wiphy: the wiphy, for debug purposes + * @dwork: the delayed work to cancel + * + * Cancel the work *without* waiting for it, this assumes being + * called under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_delayed_work_cancel(struct wiphy *wiphy, + struct wiphy_delayed_work *dwork); + /** * struct wireless_dev - wireless device state * @@ -5718,6 +5805,7 @@ static inline void wiphy_unlock(struct wiphy *wiphy) * @event_lock: (private) lock for event list * @owner_nlportid: (private) owner socket port ID * @nl_owner_dead: (private) owner socket went away + * @cqm_rssi_work: (private) CQM RSSI reporting work * @cqm_config: (private) nl80211 RSSI monitor state * @pmsr_list: (private) peer measurement requests * @pmsr_lock: (private) peer measurements requests/results lock @@ -5790,7 +5878,8 @@ struct wireless_dev { } wext; #endif - struct cfg80211_cqm_config *cqm_config; + struct wiphy_work cqm_rssi_work; + struct cfg80211_cqm_config __rcu *cqm_config; struct list_head pmsr_list; spinlock_t pmsr_lock; diff --git a/include/net/ndisc.h b/include/net/ndisc.h index da7eec8669ec4..325a6fb65c896 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -395,11 +395,11 @@ static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, cons { struct neighbour *n; - rcu_read_lock_bh(); + rcu_read_lock(); n = __ipv6_neigh_lookup_noref(dev, pkey); if (n && !refcount_inc_not_zero(&n->refcnt)) n = NULL; - rcu_read_unlock_bh(); + rcu_read_unlock(); return n; } @@ -409,10 +409,10 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev, { struct neighbour *n; - rcu_read_lock_bh(); + rcu_read_lock(); n = __ipv6_neigh_lookup_noref(dev, pkey); neigh_confirm(n); - rcu_read_unlock_bh(); + rcu_read_unlock(); } static inline void __ipv6_confirm_neigh_stub(struct net_device *dev, @@ -420,10 +420,10 @@ static inline void __ipv6_confirm_neigh_stub(struct net_device *dev, { struct neighbour *n; - rcu_read_lock_bh(); + rcu_read_lock(); n = __ipv6_neigh_lookup_noref_stub(dev, pkey); neigh_confirm(n); - rcu_read_unlock_bh(); + rcu_read_unlock(); } /* uses ipv6_stub and is meant for use outside of IPv6 core */ diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 794e45981891a..ccc4a0f8b4ad8 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -299,14 +299,14 @@ static inline struct neighbour *___neigh_lookup_noref( const void *pkey, struct net_device *dev) { - struct neigh_hash_table *nht = rcu_dereference_bh(tbl->nht); + struct neigh_hash_table *nht = rcu_dereference(tbl->nht); struct neighbour *n; u32 hash_val; hash_val = hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); - for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); + for (n = rcu_dereference(nht->hash_buckets[hash_val]); n != NULL; - n = rcu_dereference_bh(n->next)) { + n = rcu_dereference(n->next)) { if (n->dev == dev && key_eq(n, pkey)) return n; } @@ -464,7 +464,7 @@ static __always_inline int neigh_event_send_probe(struct neighbour *neigh, if (READ_ONCE(neigh->used) != now) WRITE_ONCE(neigh->used, now); - if (!(neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))) + if (!(READ_ONCE(neigh->nud_state) & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))) return __neigh_event_send(neigh, skb, immediate_ok); return 0; } @@ -541,7 +541,7 @@ static inline int neigh_output(struct neighbour *n, struct sk_buff *skb, READ_ONCE(hh->hh_len)) return neigh_hh_output(hh, skb); - return n->output(n, skb); + return READ_ONCE(n->output)(n, skb); } static inline struct neighbour * diff --git a/include/net/netlink.h b/include/net/netlink.h index 6bfa972f2fbf2..a686c9041ddc0 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -937,6 +937,27 @@ static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 se return __nlmsg_put(skb, portid, seq, type, payload, flags); } +/** + * nlmsg_append - Add more data to a nlmsg in a skb + * @skb: socket buffer to store message in + * @size: length of message payload + * + * Append data to an existing nlmsg, used when constructing a message + * with multiple fixed-format headers (which is rare). + * Returns NULL if the tailroom of the skb is insufficient to store + * the extra payload. + */ +static inline void *nlmsg_append(struct sk_buff *skb, u32 size) +{ + if (unlikely(skb_tailroom(skb) < NLMSG_ALIGN(size))) + return NULL; + + if (NLMSG_ALIGN(size) - size) + memset(skb_tail_pointer(skb) + size, 0, + NLMSG_ALIGN(size) - size); + return __skb_put(skb, NLMSG_ALIGN(size)); +} + /** * nlmsg_put_answer - Add a new callback based netlink message to an skb * @skb: socket buffer to store message in diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 28085b995ddcf..2b12725de9c09 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -497,29 +497,6 @@ static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) return NULL; } -/* Variant of nexthop_fib6_nh(). - * Caller should either hold rcu_read_lock_bh(), or RTNL. - */ -static inline struct fib6_nh *nexthop_fib6_nh_bh(struct nexthop *nh) -{ - struct nh_info *nhi; - - if (nh->is_group) { - struct nh_group *nh_grp; - - nh_grp = rcu_dereference_bh_rtnl(nh->nh_grp); - nh = nexthop_mpath_select(nh_grp, 0); - if (!nh) - return NULL; - } - - nhi = rcu_dereference_bh_rtnl(nh->nh_info); - if (nhi->family == AF_INET6) - return &nhi->fib6_nh; - - return NULL; -} - static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i) { struct fib6_nh *fib6_nh; diff --git a/include/net/tcp.h b/include/net/tcp.h index 5fd69f2342a44..9ebb54122bb71 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -355,12 +355,14 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, bool force_schedule); -static inline void tcp_dec_quickack_mode(struct sock *sk, - const unsigned int pkts) +static inline void tcp_dec_quickack_mode(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ack.quick) { + /* How many ACKs S/ACKing new data have we sent? */ + const unsigned int pkts = inet_csk_ack_scheduled(sk) ? 1 : 0; + if (pkts >= icsk->icsk_ack.quick) { icsk->icsk_ack.quick = 0; /* Leaving quickack mode we deflate ATO. */ diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 006858ed04e8c..dc2cff18b68bd 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -161,6 +161,10 @@ struct scsi_device { * pass settings from slave_alloc to scsi * core. */ unsigned int eh_timeout; /* Error handling timeout */ + + bool manage_system_start_stop; /* Let HLD (sd) manage system start/stop */ + bool manage_runtime_start_stop; /* Let HLD (sd) manage runtime start/stop */ + unsigned removable:1; unsigned changed:1; /* Data invalid due to media change */ unsigned busy:1; /* Used to prevent races */ @@ -192,7 +196,7 @@ struct scsi_device { unsigned use_192_bytes_for_3f:1; /* ask for 192 bytes from page 0x3f */ unsigned no_start_on_add:1; /* do not issue start on add */ unsigned allow_restart:1; /* issue START_UNIT in error handler */ - unsigned manage_start_stop:1; /* Let HLD (sd) manage start/stop */ + unsigned no_start_on_resume:1; /* Do not issue START_STOP_UNIT on resume */ unsigned start_stop_pwr_cond:1; /* Set power cond. in START_STOP_UNIT */ unsigned no_uld_attach:1; /* disable connecting to upper level drivers */ unsigned select_no_atn:1; diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index d27d9fb7174c8..71def41b1ad78 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -752,7 +752,7 @@ extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *, struct device *, struct device *); extern void scsi_scan_host(struct Scsi_Host *); -extern void scsi_rescan_device(struct device *); +extern int scsi_rescan_device(struct scsi_device *sdev); extern void scsi_remove_host(struct Scsi_Host *); extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *); extern int scsi_host_busy(struct Scsi_Host *shost); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 53bc487947197..92dbe89dafbf5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3112,6 +3112,11 @@ union bpf_attr { * **BPF_FIB_LOOKUP_OUTPUT** * Perform lookup from an egress perspective (default is * ingress). + * **BPF_FIB_LOOKUP_SKIP_NEIGH** + * Skip the neighbour table lookup. *params*->dmac + * and *params*->smac will not be set as output. A common + * use case is to call **bpf_redirect_neigh**\ () after + * doing **bpf_fib_lookup**\ (). * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -6678,6 +6683,7 @@ struct bpf_raw_tracepoint_args { enum { BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_OUTPUT = (1U << 1), + BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), }; enum { diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 53326dfc59ecb..4fa8511b1e355 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -198,6 +198,7 @@ enum { DEVCONF_IOAM6_ID_WIDE, DEVCONF_NDISC_EVICT_NOCARRIER, DEVCONF_ACCEPT_UNTRACKED_NA, + DEVCONF_ACCEPT_RA_MIN_LFT, DEVCONF_MAX }; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2f562cf961e0a..b7383358c4ea1 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -354,10 +354,11 @@ static void rb_init_page(struct buffer_data_page *bpage) local_set(&bpage->commit, 0); } -/* - * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing - * this issue out. - */ +static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage) +{ + return local_read(&bpage->page->commit); +} + static void free_buffer_page(struct buffer_page *bpage) { free_page((unsigned long)bpage->page); @@ -2024,7 +2025,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) * Increment overrun to account for the lost events. */ local_add(page_entries, &cpu_buffer->overrun); - local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_sub(rb_page_commit(to_remove_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); } @@ -2368,11 +2369,6 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->reader_page->read); } -static __always_inline unsigned rb_page_commit(struct buffer_page *bpage) -{ - return local_read(&bpage->page->commit); -} - static struct ring_buffer_event * rb_iter_head_event(struct ring_buffer_iter *iter) { @@ -2518,7 +2514,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, * the counters. */ local_add(entries, &cpu_buffer->overrun); - local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); /* @@ -2661,9 +2657,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, event = __rb_page_index(tail_page, tail); - /* account for padding bytes */ - local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); - /* * Save the original length to the meta data. * This will be used by the reader to add lost event @@ -2677,7 +2670,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, * write counter enough to allow another writer to slip * in on this page. * We put in a discarded commit instead, to make sure - * that this space is not used again. + * that this space is not used again, and this space will + * not be accounted into 'entries_bytes'. * * If we are less than the minimum size, we don't need to * worry about it. @@ -2702,6 +2696,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, /* time delta must be non zero */ event->time_delta = 1; + /* account for padding bytes */ + local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); + /* Make sure the padding is visible before the tail_page->write update */ smp_wmb(); @@ -4219,7 +4216,7 @@ u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu) EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts); /** - * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer + * ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ @@ -4729,6 +4726,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) length = rb_event_length(event); cpu_buffer->reader_page->read += length; + cpu_buffer->read_bytes += length; } static void rb_advance_iter(struct ring_buffer_iter *iter) @@ -5824,7 +5822,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer, } else { /* update the entry counter */ cpu_buffer->read += rb_page_entries(reader); - cpu_buffer->read_bytes += BUF_PAGE_SIZE; + cpu_buffer->read_bytes += rb_page_commit(reader); /* swap the pages */ rb_init_page(bpage); diff --git a/mm/memory.c b/mm/memory.c index 2083078cd0615..0d1b3ee8fcd7a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -672,6 +672,16 @@ out: return pfn_to_page(pfn); } +struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr, + pte_t pte) +{ + struct page *page = vm_normal_page(vma, addr, pte); + + if (page) + return page_folio(page); + return NULL; +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t pmd) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 7d36dd95d1fff..bfe2d1d50fbee 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -414,7 +414,7 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = { }, }; -static int migrate_page_add(struct page *page, struct list_head *pagelist, +static int migrate_folio_add(struct folio *folio, struct list_head *foliolist, unsigned long flags); struct queue_pages { @@ -424,6 +424,7 @@ struct queue_pages { unsigned long start; unsigned long end; struct vm_area_struct *first; + bool has_unmovable; }; /* @@ -442,21 +443,20 @@ static inline bool queue_pages_required(struct page *page, } /* - * queue_pages_pmd() has three possible return values: - * 0 - pages are placed on the right node or queued successfully, or - * special page is met, i.e. huge zero page. - * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. + * queue_folios_pmd() has three possible return values: + * 0 - folios are placed on the right node or queued successfully, or + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an - * existing page was already on a node that does not follow the + * existing folio was already on a node that does not follow the * policy. */ -static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, +static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, unsigned long end, struct mm_walk *walk) __releases(ptl) { int ret = 0; - struct page *page; + struct folio *folio; struct queue_pages *qp = walk->private; unsigned long flags; @@ -464,20 +464,20 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, ret = -EIO; goto unlock; } - page = pmd_page(*pmd); - if (is_huge_zero_page(page)) { + folio = pfn_folio(pmd_pfn(*pmd)); + if (is_huge_zero_page(&folio->page)) { walk->action = ACTION_CONTINUE; goto unlock; } - if (!queue_pages_required(page, qp)) + if (!queue_pages_required(&folio->page, qp)) goto unlock; flags = qp->flags; - /* go to thp migration */ + /* go to folio migration */ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { if (!vma_migratable(walk->vma) || - migrate_page_add(page, qp->pagelist, flags)) { - ret = 1; + migrate_folio_add(folio, qp->pagelist, flags)) { + qp->has_unmovable = true; goto unlock; } } else @@ -491,28 +491,26 @@ unlock: * Scan through pages checking if pages follow certain conditions, * and move them to the pagelist if they do. * - * queue_pages_pte_range() has three possible return values: - * 0 - pages are placed on the right node or queued successfully, or - * special page is met, i.e. zero page. - * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. - * -EIO - only MPOL_MF_STRICT was specified and an existing page was already + * queue_folios_pte_range() has three possible return values: + * 0 - folios are placed on the right node or queued successfully, or + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). + * -EIO - only MPOL_MF_STRICT was specified and an existing folio was already * on a node that does not follow the policy. */ -static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, +static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->vma; - struct page *page; + struct folio *folio; struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; - bool has_unmovable = false; pte_t *pte, *mapped_pte; spinlock_t *ptl; ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) - return queue_pages_pmd(pmd, ptl, addr, end, walk); + return queue_folios_pmd(pmd, ptl, addr, end, walk); if (pmd_trans_unstable(pmd)) return 0; @@ -521,40 +519,38 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, for (; addr != end; pte++, addr += PAGE_SIZE) { if (!pte_present(*pte)) continue; - page = vm_normal_page(vma, addr, *pte); - if (!page || is_zone_device_page(page)) + folio = vm_normal_folio(vma, addr, *pte); + if (!folio || folio_is_zone_device(folio)) continue; /* - * vm_normal_page() filters out zero pages, but there might - * still be PageReserved pages to skip, perhaps in a VDSO. + * vm_normal_folio() filters out zero pages, but there might + * still be reserved folios to skip, perhaps in a VDSO. */ - if (PageReserved(page)) + if (folio_test_reserved(folio)) continue; - if (!queue_pages_required(page, qp)) + if (!queue_pages_required(&folio->page, qp)) continue; if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { - /* MPOL_MF_STRICT must be specified if we get here */ - if (!vma_migratable(vma)) { - has_unmovable = true; - break; - } + /* + * MPOL_MF_STRICT must be specified if we get here. + * Continue walking vmas due to MPOL_MF_MOVE* flags. + */ + if (!vma_migratable(vma)) + qp->has_unmovable = true; /* * Do not abort immediately since there may be * temporary off LRU pages in the range. Still * need migrate other LRU pages. */ - if (migrate_page_add(page, qp->pagelist, flags)) - has_unmovable = true; + if (migrate_folio_add(folio, qp->pagelist, flags)) + qp->has_unmovable = true; } else break; } pte_unmap_unlock(mapped_pte, ptl); cond_resched(); - if (has_unmovable) - return 1; - return addr != end ? -EIO : 0; } @@ -594,7 +590,7 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, * Detecting misplaced page but allow migrating pages which * have been queued. */ - ret = 1; + qp->has_unmovable = true; goto unlock; } @@ -608,7 +604,7 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, * Failed to isolate page but allow migrating pages * which have been queued. */ - ret = 1; + qp->has_unmovable = true; } unlock: spin_unlock(ptl); @@ -705,7 +701,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, static const struct mm_walk_ops queue_pages_walk_ops = { .hugetlb_entry = queue_pages_hugetlb, - .pmd_entry = queue_pages_pte_range, + .pmd_entry = queue_folios_pte_range, .test_walk = queue_pages_test_walk, }; @@ -737,10 +733,13 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, .start = start, .end = end, .first = NULL, + .has_unmovable = false, }; err = walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp); + if (qp.has_unmovable) + err = 1; if (!qp.first) /* whole range in hole */ err = -EFAULT; @@ -1012,27 +1011,28 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, } #ifdef CONFIG_MIGRATION -/* - * page migration, thp tail pages can be passed. - */ -static int migrate_page_add(struct page *page, struct list_head *pagelist, +static int migrate_folio_add(struct folio *folio, struct list_head *foliolist, unsigned long flags) { - struct page *head = compound_head(page); /* - * Avoid migrating a page that is shared with others. + * We try to migrate only unshared folios. If it is shared it + * is likely not worth migrating. + * + * To check if the folio is shared, ideally we want to make sure + * every page is mapped to the same process. Doing that is very + * expensive, so check the estimated mapcount of the folio instead. */ - if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(head) == 1) { - if (!isolate_lru_page(head)) { - list_add_tail(&head->lru, pagelist); - mod_node_page_state(page_pgdat(head), - NR_ISOLATED_ANON + page_is_file_lru(head), - thp_nr_pages(head)); + if ((flags & MPOL_MF_MOVE_ALL) || folio_estimated_sharers(folio) == 1) { + if (!folio_isolate_lru(folio)) { + list_add_tail(&folio->lru, foliolist); + node_stat_mod_folio(folio, + NR_ISOLATED_ANON + folio_is_file_lru(folio), + folio_nr_pages(folio)); } else if (flags & MPOL_MF_STRICT) { /* - * Non-movable page may reach here. And, there may be - * temporary off LRU pages or non-LRU movable pages. - * Treat them as unmovable pages since they can't be + * Non-movable folio may reach here. And, there may be + * temporary off LRU folios or non-LRU movable folios. + * Treat them as unmovable folios since they can't be * isolated, so they can't be moved at the moment. It * should return -EIO for this case too. */ @@ -1224,7 +1224,7 @@ static struct page *new_page(struct page *page, unsigned long start) } #else -static int migrate_page_add(struct page *page, struct list_head *pagelist, +static int migrate_folio_add(struct folio *folio, struct list_head *foliolist, unsigned long flags) { return -EIO; @@ -1337,7 +1337,7 @@ static long do_mbind(unsigned long start, unsigned long len, putback_movable_pages(&pagelist); } - if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT))) + if (((ret > 0) || nr_failed) && (flags & MPOL_MF_STRICT)) err = -EIO; } else { up_out: diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 69668817fed37..ca017c6008b7c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -170,21 +170,12 @@ static DEFINE_MUTEX(pcp_batch_high_lock); _ret; \ }) -#define pcpu_spin_lock_irqsave(type, member, ptr, flags) \ +#define pcpu_spin_trylock(type, member, ptr) \ ({ \ type *_ret; \ pcpu_task_pin(); \ _ret = this_cpu_ptr(ptr); \ - spin_lock_irqsave(&_ret->member, flags); \ - _ret; \ -}) - -#define pcpu_spin_trylock_irqsave(type, member, ptr, flags) \ -({ \ - type *_ret; \ - pcpu_task_pin(); \ - _ret = this_cpu_ptr(ptr); \ - if (!spin_trylock_irqsave(&_ret->member, flags)) { \ + if (!spin_trylock(&_ret->member)) { \ pcpu_task_unpin(); \ _ret = NULL; \ } \ @@ -197,27 +188,16 @@ static DEFINE_MUTEX(pcp_batch_high_lock); pcpu_task_unpin(); \ }) -#define pcpu_spin_unlock_irqrestore(member, ptr, flags) \ -({ \ - spin_unlock_irqrestore(&ptr->member, flags); \ - pcpu_task_unpin(); \ -}) - /* struct per_cpu_pages specific helpers. */ #define pcp_spin_lock(ptr) \ pcpu_spin_lock(struct per_cpu_pages, lock, ptr) -#define pcp_spin_lock_irqsave(ptr, flags) \ - pcpu_spin_lock_irqsave(struct per_cpu_pages, lock, ptr, flags) - -#define pcp_spin_trylock_irqsave(ptr, flags) \ - pcpu_spin_trylock_irqsave(struct per_cpu_pages, lock, ptr, flags) +#define pcp_spin_trylock(ptr) \ + pcpu_spin_trylock(struct per_cpu_pages, lock, ptr) #define pcp_spin_unlock(ptr) \ pcpu_spin_unlock(lock, ptr) -#define pcp_spin_unlock_irqrestore(ptr, flags) \ - pcpu_spin_unlock_irqrestore(lock, ptr, flags) #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID DEFINE_PER_CPU(int, numa_node); EXPORT_PER_CPU_SYMBOL(numa_node); @@ -1548,6 +1528,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, struct per_cpu_pages *pcp, int pindex) { + unsigned long flags; int min_pindex = 0; int max_pindex = NR_PCP_LISTS - 1; unsigned int order; @@ -1563,8 +1544,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, /* Ensure requested pindex is drained first. */ pindex = pindex - 1; - /* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */ - spin_lock(&zone->lock); + spin_lock_irqsave(&zone->lock, flags); isolated_pageblocks = has_isolate_pageblock(zone); while (count > 0) { @@ -1612,7 +1592,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, } while (count > 0 && !list_empty(list)); } - spin_unlock(&zone->lock); + spin_unlock_irqrestore(&zone->lock, flags); } static void free_one_page(struct zone *zone, @@ -3126,10 +3106,10 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, unsigned int alloc_flags) { + unsigned long flags; int i, allocated = 0; - /* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */ - spin_lock(&zone->lock); + spin_lock_irqsave(&zone->lock, flags); for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype, alloc_flags); @@ -3163,7 +3143,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, * pages added to the pcp list. */ __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); - spin_unlock(&zone->lock); + spin_unlock_irqrestore(&zone->lock, flags); return allocated; } @@ -3180,16 +3160,9 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) batch = READ_ONCE(pcp->batch); to_drain = min(pcp->count, batch); if (to_drain > 0) { - unsigned long flags; - - /* - * free_pcppages_bulk expects IRQs disabled for zone->lock - * so even though pcp->lock is not intended to be IRQ-safe, - * it's needed in this context. - */ - spin_lock_irqsave(&pcp->lock, flags); + spin_lock(&pcp->lock); free_pcppages_bulk(zone, to_drain, pcp, 0); - spin_unlock_irqrestore(&pcp->lock, flags); + spin_unlock(&pcp->lock); } } #endif @@ -3203,12 +3176,9 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone) pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); if (pcp->count) { - unsigned long flags; - - /* See drain_zone_pages on why this is disabling IRQs */ - spin_lock_irqsave(&pcp->lock, flags); + spin_lock(&pcp->lock); free_pcppages_bulk(zone, pcp->count, pcp, 0); - spin_unlock_irqrestore(&pcp->lock, flags); + spin_unlock(&pcp->lock); } } @@ -3474,12 +3444,11 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp, */ void free_unref_page(struct page *page, unsigned int order) { - unsigned long flags; unsigned long __maybe_unused UP_flags; struct per_cpu_pages *pcp; struct zone *zone; unsigned long pfn = page_to_pfn(page); - int migratetype; + int migratetype, pcpmigratetype; if (!free_unref_page_prepare(page, pfn, order)) return; @@ -3487,25 +3456,25 @@ void free_unref_page(struct page *page, unsigned int order) /* * We only track unmovable, reclaimable and movable on pcp lists. * Place ISOLATE pages on the isolated list because they are being - * offlined but treat HIGHATOMIC as movable pages so we can get those - * areas back if necessary. Otherwise, we may have to free + * offlined but treat HIGHATOMIC and CMA as movable pages so we can + * get those areas back if necessary. Otherwise, we may have to free * excessively into the page allocator */ - migratetype = get_pcppage_migratetype(page); + migratetype = pcpmigratetype = get_pcppage_migratetype(page); if (unlikely(migratetype >= MIGRATE_PCPTYPES)) { if (unlikely(is_migrate_isolate(migratetype))) { free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE); return; } - migratetype = MIGRATE_MOVABLE; + pcpmigratetype = MIGRATE_MOVABLE; } zone = page_zone(page); pcp_trylock_prepare(UP_flags); - pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags); + pcp = pcp_spin_trylock(zone->per_cpu_pageset); if (pcp) { - free_unref_page_commit(zone, pcp, page, migratetype, order); - pcp_spin_unlock_irqrestore(pcp, flags); + free_unref_page_commit(zone, pcp, page, pcpmigratetype, order); + pcp_spin_unlock(pcp); } else { free_one_page(zone, page, pfn, order, migratetype, FPI_NONE); } @@ -3517,10 +3486,10 @@ void free_unref_page(struct page *page, unsigned int order) */ void free_unref_page_list(struct list_head *list) { + unsigned long __maybe_unused UP_flags; struct page *page, *next; struct per_cpu_pages *pcp = NULL; struct zone *locked_zone = NULL; - unsigned long flags; int batch_count = 0; int migratetype; @@ -3547,20 +3516,37 @@ void free_unref_page_list(struct list_head *list) list_for_each_entry_safe(page, next, list, lru) { struct zone *zone = page_zone(page); + list_del(&page->lru); + migratetype = get_pcppage_migratetype(page); + /* Different zone, different pcp lock. */ if (zone != locked_zone) { - if (pcp) - pcp_spin_unlock_irqrestore(pcp, flags); + if (pcp) { + pcp_spin_unlock(pcp); + pcp_trylock_finish(UP_flags); + } + /* + * trylock is necessary as pages may be getting freed + * from IRQ or SoftIRQ context after an IO completion. + */ + pcp_trylock_prepare(UP_flags); + pcp = pcp_spin_trylock(zone->per_cpu_pageset); + if (unlikely(!pcp)) { + pcp_trylock_finish(UP_flags); + free_one_page(zone, page, page_to_pfn(page), + 0, migratetype, FPI_NONE); + locked_zone = NULL; + continue; + } locked_zone = zone; - pcp = pcp_spin_lock_irqsave(locked_zone->per_cpu_pageset, flags); + batch_count = 0; } /* * Non-isolated types over MIGRATE_PCPTYPES get added * to the MIGRATE_MOVABLE pcp list. */ - migratetype = get_pcppage_migratetype(page); if (unlikely(migratetype >= MIGRATE_PCPTYPES)) migratetype = MIGRATE_MOVABLE; @@ -3568,18 +3554,23 @@ void free_unref_page_list(struct list_head *list) free_unref_page_commit(zone, pcp, page, migratetype, 0); /* - * Guard against excessive IRQ disabled times when we get - * a large list of pages to free. + * Guard against excessive lock hold times when freeing + * a large list of pages. Lock will be reacquired if + * necessary on the next iteration. */ if (++batch_count == SWAP_CLUSTER_MAX) { - pcp_spin_unlock_irqrestore(pcp, flags); + pcp_spin_unlock(pcp); + pcp_trylock_finish(UP_flags); batch_count = 0; - pcp = pcp_spin_lock_irqsave(locked_zone->per_cpu_pageset, flags); + pcp = NULL; + locked_zone = NULL; } } - if (pcp) - pcp_spin_unlock_irqrestore(pcp, flags); + if (pcp) { + pcp_spin_unlock(pcp); + pcp_trylock_finish(UP_flags); + } } /* @@ -3780,15 +3771,11 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, struct per_cpu_pages *pcp; struct list_head *list; struct page *page; - unsigned long flags; unsigned long __maybe_unused UP_flags; - /* - * spin_trylock may fail due to a parallel drain. In the future, the - * trylock will also protect against IRQ reentrancy. - */ + /* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */ pcp_trylock_prepare(UP_flags); - pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags); + pcp = pcp_spin_trylock(zone->per_cpu_pageset); if (!pcp) { pcp_trylock_finish(UP_flags); return NULL; @@ -3802,7 +3789,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, pcp->free_factor >>= 1; list = &pcp->lists[order_to_pindex(migratetype, order)]; page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list); - pcp_spin_unlock_irqrestore(pcp, flags); + pcp_spin_unlock(pcp); pcp_trylock_finish(UP_flags); if (page) { __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); @@ -5373,7 +5360,6 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, struct page **page_array) { struct page *page; - unsigned long flags; unsigned long __maybe_unused UP_flags; struct zone *zone; struct zoneref *z; @@ -5455,9 +5441,9 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, if (unlikely(!zone)) goto failed; - /* Is a parallel drain in progress? */ + /* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */ pcp_trylock_prepare(UP_flags); - pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags); + pcp = pcp_spin_trylock(zone->per_cpu_pageset); if (!pcp) goto failed_irq; @@ -5476,7 +5462,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, if (unlikely(!page)) { /* Try and allocate at least one page */ if (!nr_account) { - pcp_spin_unlock_irqrestore(pcp, flags); + pcp_spin_unlock(pcp); goto failed_irq; } break; @@ -5491,7 +5477,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, nr_populated++; } - pcp_spin_unlock_irqrestore(pcp, flags); + pcp_spin_unlock(pcp); pcp_trylock_finish(UP_flags); __count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index fa4dd5fab0d44..d13b498f148cc 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2783,6 +2783,7 @@ void hci_release_dev(struct hci_dev *hdev) hci_conn_params_clear_all(hdev); hci_discovery_filter_clear(hdev); hci_blocked_keys_clear(hdev); + hci_codec_list_clear(&hdev->local_codecs); hci_dev_unlock(hdev); ida_simple_remove(&hci_index_ida, hdev->id); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 83eaf25ece465..e4d8857716eb7 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -32,6 +32,7 @@ #include "hci_request.h" #include "hci_debugfs.h" +#include "hci_codec.h" #include "a2mp.h" #include "amp.h" #include "smp.h" diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index b9c5a98238374..0be75cf0efed8 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -71,7 +71,5 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn); void hci_req_add_le_passive_scan(struct hci_request *req); -void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next); - void hci_request_setup(struct hci_dev *hdev); void hci_request_cancel_all(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 2ae038dfc39f7..5218c4dfe0a89 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -412,11 +412,6 @@ static int hci_le_scan_restart_sync(struct hci_dev *hdev) LE_SCAN_FILTER_DUP_ENABLE); } -static int le_scan_restart_sync(struct hci_dev *hdev, void *data) -{ - return hci_le_scan_restart_sync(hdev); -} - static void le_scan_restart(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, @@ -426,15 +421,15 @@ static void le_scan_restart(struct work_struct *work) bt_dev_dbg(hdev, ""); - hci_dev_lock(hdev); - - status = hci_cmd_sync_queue(hdev, le_scan_restart_sync, NULL, NULL); + status = hci_le_scan_restart_sync(hdev); if (status) { bt_dev_err(hdev, "failed to restart LE scan: status %d", status); - goto unlock; + return; } + hci_dev_lock(hdev); + if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) || !hdev->discovery.scan_start) goto unlock; @@ -5033,6 +5028,7 @@ int hci_dev_close_sync(struct hci_dev *hdev) memset(hdev->eir, 0, sizeof(hdev->eir)); memset(hdev->dev_class, 0, sizeof(hdev->dev_class)); bacpy(&hdev->random_addr, BDADDR_ANY); + hci_codec_list_clear(&hdev->local_codecs); hci_dev_put(hdev); return err; diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 5cd2e775915be..91e990accbf20 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -458,7 +458,7 @@ drop: } /* -------- Socket interface ---------- */ -static struct sock *__iso_get_sock_listen_by_addr(bdaddr_t *ba) +static struct sock *__iso_get_sock_listen_by_addr(bdaddr_t *src, bdaddr_t *dst) { struct sock *sk; @@ -466,7 +466,10 @@ static struct sock *__iso_get_sock_listen_by_addr(bdaddr_t *ba) if (sk->sk_state != BT_LISTEN) continue; - if (!bacmp(&iso_pi(sk)->src, ba)) + if (bacmp(&iso_pi(sk)->dst, dst)) + continue; + + if (!bacmp(&iso_pi(sk)->src, src)) return sk; } @@ -910,7 +913,7 @@ static int iso_listen_cis(struct sock *sk) write_lock(&iso_sk_list.lock); - if (__iso_get_sock_listen_by_addr(&iso_pi(sk)->src)) + if (__iso_get_sock_listen_by_addr(&iso_pi(sk)->src, &iso_pi(sk)->dst)) err = -EADDRINUSE; write_unlock(&iso_sk_list.lock); diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index e5e48c6e35d78..b45c00c01dea1 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -192,7 +192,7 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, if (n) { struct net_bridge_fdb_entry *f; - if (!(n->nud_state & NUD_VALID)) { + if (!(READ_ONCE(n->nud_state) & NUD_VALID)) { neigh_release(n); return; } @@ -452,7 +452,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, if (n) { struct net_bridge_fdb_entry *f; - if (!(n->nud_state & NUD_VALID)) { + if (!(READ_ONCE(n->nud_state) & NUD_VALID)) { neigh_release(n); return; } diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 812bd7e1750b6..01d690d9fe5f8 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -277,7 +277,8 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); int ret; - if ((neigh->nud_state & NUD_CONNECTED) && neigh->hh.hh_len) { + if ((READ_ONCE(neigh->nud_state) & NUD_CONNECTED) && + READ_ONCE(neigh->hh.hh_len)) { neigh_hh_bridge(&neigh->hh, skb); skb->dev = nf_bridge->physindev; ret = br_handle_frame_finish(net, sk, skb); @@ -293,7 +294,7 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_ /* tell br_dev_xmit to continue with forwarding */ nf_bridge->bridged_dnat = 1; /* FIXME Need to refragment */ - ret = neigh->output(neigh, skb); + ret = READ_ONCE(neigh->output)(neigh, skb); } neigh_release(neigh); return ret; diff --git a/net/core/filter.c b/net/core/filter.c index 9fd7c88b5db4e..adc327f4af1e9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2197,7 +2197,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, return -ENOMEM; } - rcu_read_lock_bh(); + rcu_read_lock(); if (!nh) { dst = skb_dst(skb); nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst), @@ -2210,10 +2210,12 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, int ret; sock_confirm_neigh(skb, neigh); + local_bh_disable(); dev_xmit_recursion_inc(); ret = neigh_output(neigh, skb, false); dev_xmit_recursion_dec(); - rcu_read_unlock_bh(); + local_bh_enable(); + rcu_read_unlock(); return ret; } rcu_read_unlock_bh(); @@ -2295,7 +2297,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, return -ENOMEM; } - rcu_read_lock_bh(); + rcu_read_lock(); if (!nh) { struct dst_entry *dst = skb_dst(skb); struct rtable *rt = container_of(dst, struct rtable, dst); @@ -2307,7 +2309,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, } else if (nh->nh_family == AF_INET) { neigh = ip_neigh_gw4(dev, nh->ipv4_nh); } else { - rcu_read_unlock_bh(); + rcu_read_unlock(); goto out_drop; } @@ -2315,13 +2317,15 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, int ret; sock_confirm_neigh(skb, neigh); + local_bh_disable(); dev_xmit_recursion_inc(); ret = neigh_output(neigh, skb, is_v6gw); dev_xmit_recursion_dec(); - rcu_read_unlock_bh(); + local_bh_enable(); + rcu_read_unlock(); return ret; } - rcu_read_unlock_bh(); + rcu_read_unlock(); out_drop: kfree_skb(skb); return -ENETDOWN; @@ -5674,12 +5678,8 @@ static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = { #endif #if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6) -static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, - const struct neighbour *neigh, - const struct net_device *dev, u32 mtu) +static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, u32 mtu) { - memcpy(params->dmac, neigh->ha, ETH_ALEN); - memcpy(params->smac, dev->dev_addr, ETH_ALEN); params->h_vlan_TCI = 0; params->h_vlan_proto = 0; if (mtu) @@ -5790,21 +5790,29 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (likely(nhc->nhc_gw_family != AF_INET6)) { if (nhc->nhc_gw_family) params->ipv4_dst = nhc->nhc_gw.ipv4; - - neigh = __ipv4_neigh_lookup_noref(dev, - (__force u32)params->ipv4_dst); } else { struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst; params->family = AF_INET6; *dst = nhc->nhc_gw.ipv6; - neigh = __ipv6_neigh_lookup_noref_stub(dev, dst); } - if (!neigh || !(neigh->nud_state & NUD_VALID)) + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) + goto set_fwd_params; + + if (likely(nhc->nhc_gw_family != AF_INET6)) + neigh = __ipv4_neigh_lookup_noref(dev, + (__force u32)params->ipv4_dst); + else + neigh = __ipv6_neigh_lookup_noref_stub(dev, params->ipv6_dst); + + if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID)) return BPF_FIB_LKUP_RET_NO_NEIGH; + memcpy(params->dmac, neigh->ha, ETH_ALEN); + memcpy(params->smac, dev->dev_addr, ETH_ALEN); - return bpf_fib_set_fwd_params(params, neigh, dev, mtu); +set_fwd_params: + return bpf_fib_set_fwd_params(params, mtu); } #endif @@ -5912,24 +5920,33 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.f6i->fib6_metric; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) + goto set_fwd_params; + /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is * not needed here. */ neigh = __ipv6_neigh_lookup_noref_stub(dev, dst); - if (!neigh || !(neigh->nud_state & NUD_VALID)) + if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID)) return BPF_FIB_LKUP_RET_NO_NEIGH; + memcpy(params->dmac, neigh->ha, ETH_ALEN); + memcpy(params->smac, dev->dev_addr, ETH_ALEN); - return bpf_fib_set_fwd_params(params, neigh, dev, mtu); +set_fwd_params: + return bpf_fib_set_fwd_params(params, mtu); } #endif +#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ + BPF_FIB_LOOKUP_SKIP_NEIGH) + BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, struct bpf_fib_lookup *, params, int, plen, u32, flags) { if (plen < sizeof(*params)) return -EINVAL; - if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT)) + if (flags & ~BPF_FIB_LOOKUP_MASK) return -EINVAL; switch (params->family) { @@ -5967,7 +5984,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb, if (plen < sizeof(*params)) return -EINVAL; - if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT)) + if (flags & ~BPF_FIB_LOOKUP_MASK) return -EINVAL; if (params->tot_len) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6c0f2149f2c72..b20c9768d9f3f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -410,7 +410,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, */ __skb_queue_purge(&n->arp_queue); n->arp_queue_len_bytes = 0; - n->output = neigh_blackhole; + WRITE_ONCE(n->output, neigh_blackhole); if (n->nud_state & NUD_VALID) n->nud_state = NUD_NOARP; else @@ -614,7 +614,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, NEIGH_CACHE_STAT_INC(tbl, lookups); - rcu_read_lock_bh(); + rcu_read_lock(); n = __neigh_lookup_noref(tbl, pkey, dev); if (n) { if (!refcount_inc_not_zero(&n->refcnt)) @@ -622,7 +622,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, NEIGH_CACHE_STAT_INC(tbl, hits); } - rcu_read_unlock_bh(); + rcu_read_unlock(); return n; } EXPORT_SYMBOL(neigh_lookup); @@ -920,7 +920,7 @@ static void neigh_suspect(struct neighbour *neigh) { neigh_dbg(2, "neigh %p is suspected\n", neigh); - neigh->output = neigh->ops->output; + WRITE_ONCE(neigh->output, neigh->ops->output); } /* Neighbour state is OK; @@ -932,7 +932,7 @@ static void neigh_connect(struct neighbour *neigh) { neigh_dbg(2, "neigh %p is connected\n", neigh); - neigh->output = neigh->ops->connected_output; + WRITE_ONCE(neigh->output, neigh->ops->connected_output); } static void neigh_periodic_work(struct work_struct *work) @@ -988,7 +988,9 @@ static void neigh_periodic_work(struct work_struct *work) (state == NUD_FAILED || !time_in_range_open(jiffies, n->used, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { - *np = n->next; + rcu_assign_pointer(*np, + rcu_dereference_protected(n->next, + lockdep_is_held(&tbl->lock))); neigh_mark_dead(n); write_unlock(&n->lock); neigh_cleanup_and_release(n); @@ -1093,13 +1095,13 @@ static void neigh_timer_handler(struct timer_list *t) neigh->used + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { neigh_dbg(2, "neigh %p is delayed\n", neigh); - neigh->nud_state = NUD_DELAY; + WRITE_ONCE(neigh->nud_state, NUD_DELAY); neigh->updated = jiffies; neigh_suspect(neigh); next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME); } else { neigh_dbg(2, "neigh %p is suspected\n", neigh); - neigh->nud_state = NUD_STALE; + WRITE_ONCE(neigh->nud_state, NUD_STALE); neigh->updated = jiffies; neigh_suspect(neigh); notify = 1; @@ -1109,14 +1111,14 @@ static void neigh_timer_handler(struct timer_list *t) neigh->confirmed + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { neigh_dbg(2, "neigh %p is now reachable\n", neigh); - neigh->nud_state = NUD_REACHABLE; + WRITE_ONCE(neigh->nud_state, NUD_REACHABLE); neigh->updated = jiffies; neigh_connect(neigh); notify = 1; next = neigh->confirmed + neigh->parms->reachable_time; } else { neigh_dbg(2, "neigh %p is probed\n", neigh); - neigh->nud_state = NUD_PROBE; + WRITE_ONCE(neigh->nud_state, NUD_PROBE); neigh->updated = jiffies; atomic_set(&neigh->probes, 0); notify = 1; @@ -1130,7 +1132,7 @@ static void neigh_timer_handler(struct timer_list *t) if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { - neigh->nud_state = NUD_FAILED; + WRITE_ONCE(neigh->nud_state, NUD_FAILED); notify = 1; neigh_invalidate(neigh); goto out; @@ -1179,7 +1181,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES)); neigh_del_timer(neigh); - neigh->nud_state = NUD_INCOMPLETE; + WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE); neigh->updated = now; if (!immediate_ok) { next = now + 1; @@ -1191,7 +1193,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, } neigh_add_timer(neigh, next); } else { - neigh->nud_state = NUD_FAILED; + WRITE_ONCE(neigh->nud_state, NUD_FAILED); neigh->updated = jiffies; write_unlock_bh(&neigh->lock); @@ -1201,7 +1203,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, } else if (neigh->nud_state & NUD_STALE) { neigh_dbg(2, "neigh %p is delayed\n", neigh); neigh_del_timer(neigh); - neigh->nud_state = NUD_DELAY; + WRITE_ONCE(neigh->nud_state, NUD_DELAY); neigh->updated = jiffies; neigh_add_timer(neigh, jiffies + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME)); @@ -1313,7 +1315,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, neigh_update_flags(neigh, flags, ¬ify, &gc_update, &managed_update); if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) { new = old & ~NUD_PERMANENT; - neigh->nud_state = new; + WRITE_ONCE(neigh->nud_state, new); err = 0; goto out; } @@ -1322,7 +1324,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, neigh_del_timer(neigh); if (old & NUD_CONNECTED) neigh_suspect(neigh); - neigh->nud_state = new; + WRITE_ONCE(neigh->nud_state, new); err = 0; notify = old & NUD_VALID; if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && @@ -1401,7 +1403,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, ((new & NUD_REACHABLE) ? neigh->parms->reachable_time : 0))); - neigh->nud_state = new; + WRITE_ONCE(neigh->nud_state, new); notify = 1; } @@ -1447,7 +1449,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, if (n2) n1 = n2; } - n1->output(n1, skb); + READ_ONCE(n1->output)(n1, skb); if (n2) neigh_release(n2); rcu_read_unlock(); @@ -1488,7 +1490,7 @@ void __neigh_set_probe_once(struct neighbour *neigh) neigh->updated = jiffies; if (!(neigh->nud_state & NUD_FAILED)) return; - neigh->nud_state = NUD_INCOMPLETE; + WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE); atomic_set(&neigh->probes, neigh_max_probes(neigh)); neigh_add_timer(neigh, jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), @@ -2174,11 +2176,11 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, .ndtc_proxy_qlen = tbl->proxy_queue.qlen, }; - rcu_read_lock_bh(); - nht = rcu_dereference_bh(tbl->nht); + rcu_read_lock(); + nht = rcu_dereference(tbl->nht); ndc.ndtc_hash_rnd = nht->hash_rnd[0]; ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); - rcu_read_unlock_bh(); + rcu_read_unlock(); if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc)) goto nla_put_failure; @@ -2693,15 +2695,15 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, if (filter->dev_idx || filter->master_idx) flags |= NLM_F_DUMP_FILTERED; - rcu_read_lock_bh(); - nht = rcu_dereference_bh(tbl->nht); + rcu_read_lock(); + nht = rcu_dereference(tbl->nht); for (h = s_h; h < (1 << nht->hash_shift); h++) { if (h > s_h) s_idx = 0; - for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; + for (n = rcu_dereference(nht->hash_buckets[h]), idx = 0; n != NULL; - n = rcu_dereference_bh(n->next)) { + n = rcu_dereference(n->next)) { if (idx < s_idx || !net_eq(dev_net(n->dev), net)) goto next; if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || @@ -2720,7 +2722,7 @@ next: } rc = skb->len; out: - rcu_read_unlock_bh(); + rcu_read_unlock(); cb->args[1] = h; cb->args[2] = idx; return rc; @@ -3065,20 +3067,20 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void int chain; struct neigh_hash_table *nht; - rcu_read_lock_bh(); - nht = rcu_dereference_bh(tbl->nht); + rcu_read_lock(); + nht = rcu_dereference(tbl->nht); - read_lock(&tbl->lock); /* avoid resizes */ + read_lock_bh(&tbl->lock); /* avoid resizes */ for (chain = 0; chain < (1 << nht->hash_shift); chain++) { struct neighbour *n; - for (n = rcu_dereference_bh(nht->hash_buckets[chain]); + for (n = rcu_dereference(nht->hash_buckets[chain]); n != NULL; - n = rcu_dereference_bh(n->next)) + n = rcu_dereference(n->next)) cb(n, cookie); } - read_unlock(&tbl->lock); - rcu_read_unlock_bh(); + read_unlock_bh(&tbl->lock); + rcu_read_unlock(); } EXPORT_SYMBOL(neigh_for_each); @@ -3128,7 +3130,7 @@ int neigh_xmit(int index, struct net_device *dev, tbl = neigh_tables[index]; if (!tbl) goto out; - rcu_read_lock_bh(); + rcu_read_lock(); if (index == NEIGH_ARP_TABLE) { u32 key = *((u32 *)addr); @@ -3140,11 +3142,11 @@ int neigh_xmit(int index, struct net_device *dev, neigh = __neigh_create(tbl, addr, dev, false); err = PTR_ERR(neigh); if (IS_ERR(neigh)) { - rcu_read_unlock_bh(); + rcu_read_unlock(); goto out_kfree_skb; } - err = neigh->output(neigh, skb); - rcu_read_unlock_bh(); + err = READ_ONCE(neigh->output)(neigh, skb); + rcu_read_unlock(); } else if (index == NEIGH_LINK_TABLE) { err = dev_hard_header(skb, dev, ntohs(skb->protocol), @@ -3173,7 +3175,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) state->flags &= ~NEIGH_SEQ_IS_PNEIGH; for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { - n = rcu_dereference_bh(nht->hash_buckets[bucket]); + n = rcu_dereference(nht->hash_buckets[bucket]); while (n) { if (!net_eq(dev_net(n->dev), net)) @@ -3188,10 +3190,10 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) } if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) break; - if (n->nud_state & ~NUD_NOARP) + if (READ_ONCE(n->nud_state) & ~NUD_NOARP) break; next: - n = rcu_dereference_bh(n->next); + n = rcu_dereference(n->next); } if (n) @@ -3215,7 +3217,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, if (v) return n; } - n = rcu_dereference_bh(n->next); + n = rcu_dereference(n->next); while (1) { while (n) { @@ -3230,10 +3232,10 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) break; - if (n->nud_state & ~NUD_NOARP) + if (READ_ONCE(n->nud_state) & ~NUD_NOARP) break; next: - n = rcu_dereference_bh(n->next); + n = rcu_dereference(n->next); } if (n) @@ -3242,7 +3244,7 @@ next: if (++state->bucket >= (1 << nht->hash_shift)) break; - n = rcu_dereference_bh(nht->hash_buckets[state->bucket]); + n = rcu_dereference(nht->hash_buckets[state->bucket]); } if (n && pos) @@ -3344,7 +3346,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) __acquires(tbl->lock) - __acquires(rcu_bh) + __acquires(rcu) { struct neigh_seq_state *state = seq->private; @@ -3352,9 +3354,9 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl state->bucket = 0; state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); - rcu_read_lock_bh(); - state->nht = rcu_dereference_bh(tbl->nht); - read_lock(&tbl->lock); + rcu_read_lock(); + state->nht = rcu_dereference(tbl->nht); + read_lock_bh(&tbl->lock); return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; } @@ -3389,13 +3391,13 @@ EXPORT_SYMBOL(neigh_seq_next); void neigh_seq_stop(struct seq_file *seq, void *v) __releases(tbl->lock) - __releases(rcu_bh) + __releases(rcu) { struct neigh_seq_state *state = seq->private; struct neigh_table *tbl = state->tbl; - read_unlock(&tbl->lock); - rcu_read_unlock_bh(); + read_unlock_bh(&tbl->lock); + rcu_read_unlock(); } EXPORT_SYMBOL(neigh_seq_stop); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 96db7409baa12..38e01f82f2ef3 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -670,6 +670,8 @@ BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg *, msg, sk = __sock_map_lookup_elem(map, key); if (unlikely(!sk || !sock_map_redirect_allowed(sk))) return SK_DROP; + if (!(flags & BPF_F_INGRESS) && !sk_is_tcp(sk)) + return SK_DROP; msg->flags = flags; msg->sk_redir = sk; @@ -1262,6 +1264,8 @@ BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg *, msg, sk = __sock_hash_lookup_elem(map, key); if (unlikely(!sk || !sock_map_redirect_allowed(sk))) return SK_DROP; + if (!(flags & BPF_F_INGRESS) && !sk_is_tcp(sk)) + return SK_DROP; msg->flags = flags; msg->sk_redir = sk; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 4f7237661afb9..9456f5bb35e5d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -375,7 +375,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); if (probes < 0) { - if (!(neigh->nud_state & NUD_VALID)) + if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) pr_debug("trying to ucast probe in NUD_INVALID\n"); neigh_ha_snapshot(dst_ha, neigh, dev); dst_hw = dst_ha; @@ -1123,7 +1123,7 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) neigh = neigh_lookup(&arp_tbl, &ip, dev); if (neigh) { - if (!(neigh->nud_state & NUD_NOARP)) { + if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) { read_lock_bh(&neigh->lock); memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); r->arp_flags = arp_state_to_flags(neigh); @@ -1144,12 +1144,12 @@ int arp_invalidate(struct net_device *dev, __be32 ip, bool force) struct neigh_table *tbl = &arp_tbl; if (neigh) { - if ((neigh->nud_state & NUD_VALID) && !force) { + if ((READ_ONCE(neigh->nud_state) & NUD_VALID) && !force) { neigh_release(neigh); return 0; } - if (neigh->nud_state & ~NUD_NOARP) + if (READ_ONCE(neigh->nud_state) & ~NUD_NOARP) err = neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE| NEIGH_UPDATE_F_ADMIN, 0); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 3b6e6bc80dc1c..eafa4a0335157 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -564,7 +564,7 @@ static int fib_detect_death(struct fib_info *fi, int order, n = NULL; if (n) { - state = n->nud_state; + state = READ_ONCE(n->nud_state); neigh_release(n); } else { return 0; @@ -2194,7 +2194,7 @@ static bool fib_good_nh(const struct fib_nh *nh) if (nh->fib_nh_scope == RT_SCOPE_LINK) { struct neighbour *n; - rcu_read_lock_bh(); + rcu_read_lock(); if (likely(nh->fib_nh_gw_family == AF_INET)) n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, @@ -2205,9 +2205,9 @@ static bool fib_good_nh(const struct fib_nh *nh) else n = NULL; if (n) - state = n->nud_state; + state = READ_ONCE(n->nud_state); - rcu_read_unlock_bh(); + rcu_read_unlock(); } return !!(state & NUD_VALID); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 66908ce2dd116..493c679ea54f3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -218,7 +218,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s return res; } - rcu_read_lock_bh(); + rcu_read_lock(); neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); if (!IS_ERR(neigh)) { int res; @@ -226,10 +226,10 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s sock_confirm_neigh(skb, neigh); /* if crossing protocols, can not use the cached header */ res = neigh_output(neigh, skb, is_v6gw); - rcu_read_unlock_bh(); + rcu_read_unlock(); return res; } - rcu_read_unlock_bh(); + rcu_read_unlock(); net_dbg_ratelimited("%s: No header cache and no neighbour!\n", __func__); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 9cc2879024541..be5498f5dd319 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1124,13 +1124,13 @@ static bool ipv6_good_nh(const struct fib6_nh *nh) int state = NUD_REACHABLE; struct neighbour *n; - rcu_read_lock_bh(); + rcu_read_lock(); n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6); if (n) - state = n->nud_state; + state = READ_ONCE(n->nud_state); - rcu_read_unlock_bh(); + rcu_read_unlock(); return !!(state & NUD_VALID); } @@ -1140,14 +1140,14 @@ static bool ipv4_good_nh(const struct fib_nh *nh) int state = NUD_REACHABLE; struct neighbour *n; - rcu_read_lock_bh(); + rcu_read_lock(); n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, (__force u32)nh->fib_nh_gw4); if (n) - state = n->nud_state; + state = READ_ONCE(n->nud_state); - rcu_read_unlock_bh(); + rcu_read_unlock(); return !!(state & NUD_VALID); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 84a0a71a6f4e7..9cbaae4f5ee71 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -408,7 +408,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, struct net_device *dev = dst->dev; struct neighbour *n; - rcu_read_lock_bh(); + rcu_read_lock(); if (likely(rt->rt_gw_family == AF_INET)) { n = ip_neigh_gw4(dev, rt->rt_gw4); @@ -424,7 +424,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, if (!IS_ERR(n) && !refcount_inc_not_zero(&n->refcnt)) n = NULL; - rcu_read_unlock_bh(); + rcu_read_unlock(); return n; } @@ -784,7 +784,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow if (!n) n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); if (!IS_ERR(n)) { - if (!(n->nud_state & NUD_VALID)) { + if (!(READ_ONCE(n->nud_state) & NUD_VALID)) { neigh_event_send(n, NULL); } else { if (fib_lookup(net, fl4, &res, 0) == 0) { @@ -3421,6 +3421,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fa->fa_type == fri.type) { fri.offload = READ_ONCE(fa->offload); fri.trap = READ_ONCE(fa->trap); + fri.offload_failed = + READ_ONCE(fa->offload_failed); break; } } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fab25d4f3a6f1..96fdde6e42b1b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1755,16 +1755,13 @@ EXPORT_SYMBOL(tcp_read_sock); int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { - struct tcp_sock *tp = tcp_sk(sk); - u32 seq = tp->copied_seq; struct sk_buff *skb; int copied = 0; - u32 offset; if (sk->sk_state == TCP_LISTEN) return -ENOTCONN; - while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) { + while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { u8 tcp_flags; int used; @@ -1777,13 +1774,10 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) copied = used; break; } - seq += used; copied += used; - if (tcp_flags & TCPHDR_FIN) { - ++seq; + if (tcp_flags & TCPHDR_FIN) break; - } } return copied; } diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 5f93918c063c7..f53380fd89bcf 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -217,6 +217,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk, int *addr_len) { struct tcp_sock *tcp = tcp_sk(sk); + int peek = flags & MSG_PEEK; u32 seq = tcp->copied_seq; struct sk_psock *psock; int copied = 0; @@ -306,7 +307,8 @@ msg_bytes_ready: copied = -EAGAIN; } out: - WRITE_ONCE(tcp->copied_seq, seq); + if (!peek) + WRITE_ONCE(tcp->copied_seq, seq); tcp_rcv_space_adjust(sk); if (copied > 0) __tcp_cleanup_rbuf(sk, copied); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c697836f2b5b4..068221e742425 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -243,6 +243,19 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) if (unlikely(len > icsk->icsk_ack.rcv_mss + MAX_TCP_OPTION_SPACE)) tcp_gro_dev_warn(sk, skb, len); + /* If the skb has a len of exactly 1*MSS and has the PSH bit + * set then it is likely the end of an application write. So + * more data may not be arriving soon, and yet the data sender + * may be waiting for an ACK if cwnd-bound or using TX zero + * copy. So we set ICSK_ACK_PUSHED here so that + * tcp_cleanup_rbuf() will send an ACK immediately if the app + * reads all of the data and is not ping-pong. If len > MSS + * then this logic does not matter (and does not hurt) because + * tcp_cleanup_rbuf() will always ACK immediately if the app + * reads data and there is more than an MSS of unACKed data. + */ + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_PSH) + icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; } else { /* Otherwise, we make more careful check taking into account, * that SACKs block is variable. diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index dc3166e56169f..5921b0f6f9f41 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -177,8 +177,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp, } /* Account for an ACK we sent. */ -static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts, - u32 rcv_nxt) +static inline void tcp_event_ack_sent(struct sock *sk, u32 rcv_nxt) { struct tcp_sock *tp = tcp_sk(sk); @@ -192,7 +191,7 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts, if (unlikely(rcv_nxt != tp->rcv_nxt)) return; /* Special ACK sent by DCTCP to reflect ECN */ - tcp_dec_quickack_mode(sk, pkts); + tcp_dec_quickack_mode(sk); inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } @@ -1373,7 +1372,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, sk, skb); if (likely(tcb->tcp_flags & TCPHDR_ACK)) - tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt); + tcp_event_ack_sent(sk, rcv_nxt); if (skb->len != tcp_header_size) { tcp_event_data_sent(tp, sk); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 83be842198244..c63ccd39fc552 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -202,6 +202,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .ra_defrtr_metric = IP6_RT_PRIO_USER, .accept_ra_from_local = 0, .accept_ra_min_hop_limit= 1, + .accept_ra_min_lft = 0, .accept_ra_pinfo = 1, #ifdef CONFIG_IPV6_ROUTER_PREF .accept_ra_rtr_pref = 1, @@ -262,6 +263,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .ra_defrtr_metric = IP6_RT_PRIO_USER, .accept_ra_from_local = 0, .accept_ra_min_hop_limit= 1, + .accept_ra_min_lft = 0, .accept_ra_pinfo = 1, #ifdef CONFIG_IPV6_ROUTER_PREF .accept_ra_rtr_pref = 1, @@ -1033,7 +1035,7 @@ static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa) unsigned int hash = inet6_addr_hash(net, &ifa->addr); int err = 0; - spin_lock(&net->ipv6.addrconf_hash_lock); + spin_lock_bh(&net->ipv6.addrconf_hash_lock); /* Ignore adding duplicate addresses on an interface */ if (ipv6_chk_same_addr(net, &ifa->addr, dev, hash)) { @@ -1043,7 +1045,7 @@ static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa) hlist_add_head_rcu(&ifa->addr_lst, &net->ipv6.inet6_addr_lst[hash]); } - spin_unlock(&net->ipv6.addrconf_hash_lock); + spin_unlock_bh(&net->ipv6.addrconf_hash_lock); return err; } @@ -1138,15 +1140,15 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, /* For caller */ refcount_set(&ifa->refcnt, 1); - rcu_read_lock_bh(); + rcu_read_lock(); err = ipv6_add_addr_hash(idev->dev, ifa); if (err < 0) { - rcu_read_unlock_bh(); + rcu_read_unlock(); goto out; } - write_lock(&idev->lock); + write_lock_bh(&idev->lock); /* Add to inet6_dev unicast addr list. */ ipv6_link_dev_addr(idev, ifa); @@ -1157,9 +1159,9 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, } in6_ifa_hold(ifa); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); - rcu_read_unlock_bh(); + rcu_read_unlock(); inet6addr_notifier_call_chain(NETDEV_UP, ifa); out: @@ -2731,6 +2733,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) return; } + if (valid_lft != 0 && valid_lft < in6_dev->cnf.accept_ra_min_lft) + goto put; + /* * Two things going on here: * 1) Add routes for on-link prefixes @@ -5601,6 +5606,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide; array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier; array[DEVCONF_ACCEPT_UNTRACKED_NA] = cnf->accept_untracked_na; + array[DEVCONF_ACCEPT_RA_MIN_LFT] = cnf->accept_ra_min_lft; } static inline size_t inet6_ifla6_size(void) @@ -6794,6 +6800,13 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "accept_ra_min_lft", + .data = &ipv6_devconf.accept_ra_min_lft, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "accept_ra_pinfo", .data = &ipv6_devconf.accept_ra_pinfo, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 413f66781e50d..eb6640f9a7921 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2492,7 +2492,7 @@ static int ipv6_route_native_seq_show(struct seq_file *seq, void *v) const struct net_device *dev; if (rt->nh) - fib6_nh = nexthop_fib6_nh_bh(rt->nh); + fib6_nh = nexthop_fib6_nh(rt->nh); seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen); @@ -2557,14 +2557,14 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl, if (tbl) { h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1; - node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist)); + node = rcu_dereference(hlist_next_rcu(&tbl->tb6_hlist)); } else { h = 0; node = NULL; } while (!node && h < FIB6_TABLE_HASHSZ) { - node = rcu_dereference_bh( + node = rcu_dereference( hlist_first_rcu(&net->ipv6.fib_table_hash[h++])); } return hlist_entry_safe(node, struct fib6_table, tb6_hlist); @@ -2594,7 +2594,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (!v) goto iter_table; - n = rcu_dereference_bh(((struct fib6_info *)v)->fib6_next); + n = rcu_dereference(((struct fib6_info *)v)->fib6_next); if (n) return n; @@ -2620,12 +2620,12 @@ iter_table: } static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU_BH) + __acquires(RCU) { struct net *net = seq_file_net(seq); struct ipv6_route_iter *iter = seq->private; - rcu_read_lock_bh(); + rcu_read_lock(); iter->tbl = ipv6_route_seq_next_table(NULL, net); iter->skip = *pos; @@ -2646,7 +2646,7 @@ static bool ipv6_route_iter_active(struct ipv6_route_iter *iter) } static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v) - __releases(RCU_BH) + __releases(RCU) { struct net *net = seq_file_net(seq); struct ipv6_route_iter *iter = seq->private; @@ -2654,7 +2654,7 @@ static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v) if (ipv6_route_iter_active(iter)) fib6_walker_unlink(net, &iter->w); - rcu_read_unlock_bh(); + rcu_read_unlock(); } #if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 34192f7a166fb..ce2c5e728745f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -116,7 +116,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * return res; } - rcu_read_lock_bh(); + rcu_read_lock(); nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); @@ -124,7 +124,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dev, false); if (IS_ERR(neigh)) { - rcu_read_unlock_bh(); + rcu_read_unlock(); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); return -EINVAL; @@ -132,7 +132,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } sock_confirm_neigh(skb, neigh); ret = neigh_output(neigh, skb, false); - rcu_read_unlock_bh(); + rcu_read_unlock(); return ret; } @@ -1150,11 +1150,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, * dst entry of the nexthop router */ rt = (struct rt6_info *) *dst; - rcu_read_lock_bh(); + rcu_read_lock(); n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); - err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; - rcu_read_unlock_bh(); + err = n && !(READ_ONCE(n->nud_state) & NUD_VALID) ? -EINVAL : 0; + rcu_read_unlock(); if (err) { struct inet6_ifaddr *ifp; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index a4d43eb45a9de..8c5a99fe68030 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -746,7 +746,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) saddr = &ipv6_hdr(skb)->saddr; probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); if (probes < 0) { - if (!(neigh->nud_state & NUD_VALID)) { + if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) { ND_PRINTK(1, dbg, "%s: trying to ucast probe in NUD_INVALID: %pI6\n", __func__, target); @@ -1092,7 +1092,7 @@ static void ndisc_recv_na(struct sk_buff *skb) u8 old_flags = neigh->flags; struct net *net = dev_net(dev); - if (neigh->nud_state & NUD_FAILED) + if (READ_ONCE(neigh->nud_state) & NUD_FAILED) goto out; /* @@ -1331,6 +1331,14 @@ static void ndisc_router_discovery(struct sk_buff *skb) goto skip_defrtr; } + lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); + if (lifetime != 0 && lifetime < in6_dev->cnf.accept_ra_min_lft) { + ND_PRINTK(2, info, + "RA: router lifetime (%ds) is too short: %s\n", + lifetime, skb->dev->name); + goto skip_defrtr; + } + /* Do not accept RA with source-addr found on local machine unless * accept_ra_from_local is set to true. */ @@ -1343,8 +1351,6 @@ static void ndisc_router_discovery(struct sk_buff *skb) goto skip_defrtr; } - lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); - #ifdef CONFIG_IPV6_ROUTER_PREF pref = ra_msg->icmph.icmp6_router_pref; /* 10b is handled as if it were 00b (medium) */ @@ -1519,6 +1525,9 @@ skip_linkparms: if (ri->prefix_len == 0 && !in6_dev->cnf.accept_ra_defrtr) continue; + if (ri->lifetime != 0 && + ntohl(ri->lifetime) < in6_dev->cnf.accept_ra_min_lft) + continue; if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen) continue; if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 93957b20fccce..0bcdb675ba2c1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -636,15 +636,15 @@ static void rt6_probe(struct fib6_nh *fib6_nh) nh_gw = &fib6_nh->fib_nh_gw6; dev = fib6_nh->fib_nh_dev; - rcu_read_lock_bh(); + rcu_read_lock(); last_probe = READ_ONCE(fib6_nh->last_probe); idev = __in6_dev_get(dev); neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); if (neigh) { - if (neigh->nud_state & NUD_VALID) + if (READ_ONCE(neigh->nud_state) & NUD_VALID) goto out; - write_lock(&neigh->lock); + write_lock_bh(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, neigh->updated + idev->cnf.rtr_probe_interval)) { @@ -652,7 +652,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh) if (work) __neigh_set_probe_once(neigh); } - write_unlock(&neigh->lock); + write_unlock_bh(&neigh->lock); } else if (time_after(jiffies, last_probe + idev->cnf.rtr_probe_interval)) { work = kmalloc(sizeof(*work), GFP_ATOMIC); @@ -670,7 +670,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh) } out: - rcu_read_unlock_bh(); + rcu_read_unlock(); } #else static inline void rt6_probe(struct fib6_nh *fib6_nh) @@ -686,25 +686,25 @@ static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh) enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; struct neighbour *neigh; - rcu_read_lock_bh(); + rcu_read_lock(); neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev, &fib6_nh->fib_nh_gw6); if (neigh) { - read_lock(&neigh->lock); - if (neigh->nud_state & NUD_VALID) + u8 nud_state = READ_ONCE(neigh->nud_state); + + if (nud_state & NUD_VALID) ret = RT6_NUD_SUCCEED; #ifdef CONFIG_IPV6_ROUTER_PREF - else if (!(neigh->nud_state & NUD_FAILED)) + else if (!(nud_state & NUD_FAILED)) ret = RT6_NUD_SUCCEED; else ret = RT6_NUD_FAIL_PROBE; #endif - read_unlock(&neigh->lock); } else { ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR; } - rcu_read_unlock_bh(); + rcu_read_unlock(); return ret; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4bdd356bb5c46..7be89dcfd5fc5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1644,9 +1644,12 @@ process: struct sock *nsk; sk = req->rsk_listener; - drop_reason = tcp_inbound_md5_hash(sk, skb, - &hdr->saddr, &hdr->daddr, - AF_INET6, dif, sdif); + if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) + drop_reason = SKB_DROP_REASON_XFRM_POLICY; + else + drop_reason = tcp_inbound_md5_hash(sk, skb, + &hdr->saddr, &hdr->daddr, + AF_INET6, dif, sdif); if (drop_reason) { sk_drops_add(sk, skb); reqsk_put(req); @@ -1693,6 +1696,7 @@ process: } goto discard_and_relse; } + nf_reset_ct(skb); if (nsk == sk) { reqsk_put(req); tcp_v6_restore_cb(skb); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index bce4132b0a5c8..314ec3a51e8de 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -510,7 +510,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) */ if (len > INT_MAX - transhdrlen) return -EMSGSIZE; - ulen = len + transhdrlen; /* Mirror BSD error message compatibility */ if (msg->msg_flags & MSG_OOB) @@ -631,6 +630,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) back_from_confirm: lock_sock(sk); + ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0; err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, transhdrlen, &ipc6, &fl6, (struct rt6_info *)dst, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index cf3453b532d67..0167413d56972 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -566,6 +566,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, } err = ieee80211_key_link(key, link, sta); + /* KRACK protection, shouldn't happen but just silently accept key */ + if (err == -EALREADY) + err = 0; out_unlock: mutex_unlock(&local->sta_mtx); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index e8f6c1e5eabfc..23bb24243c6e9 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -901,7 +901,7 @@ int ieee80211_key_link(struct ieee80211_key *key, */ if (ieee80211_key_identical(sdata, old_key, key)) { ieee80211_key_free_unused(key); - ret = 0; + ret = -EALREADY; goto out; } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 9127a7fd5269c..5d845fcf3d09e 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -2047,7 +2047,7 @@ static int mptcp_event_put_token_and_ssk(struct sk_buff *skb, nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if)) return -EMSGSIZE; - sk_err = ssk->sk_err; + sk_err = READ_ONCE(ssk->sk_err); if (sk_err && sk->sk_state == TCP_ESTABLISHED && nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err)) return -EMSGSIZE; diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 8a2aa63caa51f..38cbdc66d8bff 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -309,12 +309,6 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) goto create_err; } - if (addr_l.id == 0) { - NL_SET_ERR_MSG_ATTR(info->extack, laddr, "missing local addr id"); - err = -EINVAL; - goto create_err; - } - err = mptcp_pm_parse_addr(raddr, info, &addr_r); if (err < 0) { NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr"); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 6dd880d6b0518..b6e0579e72644 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -401,7 +401,7 @@ drop: return false; } -static void mptcp_stop_timer(struct sock *sk) +static void mptcp_stop_rtx_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -765,6 +765,46 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) return moved; } +static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk) +{ + int err = sock_error(ssk); + int ssk_state; + + if (!err) + return false; + + /* only propagate errors on fallen-back sockets or + * on MPC connect + */ + if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(mptcp_sk(sk))) + return false; + + /* We need to propagate only transition to CLOSE state. + * Orphaned socket will see such state change via + * subflow_sched_work_if_closed() and that path will properly + * destroy the msk as needed. + */ + ssk_state = inet_sk_state_load(ssk); + if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) + inet_sk_state_store(sk, ssk_state); + WRITE_ONCE(sk->sk_err, -err); + + /* This barrier is coupled with smp_rmb() in mptcp_poll() */ + smp_wmb(); + sk_error_report(sk); + return true; +} + +void __mptcp_error_report(struct sock *sk) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk = mptcp_sk(sk); + + mptcp_for_each_subflow(msk, subflow) + if (__mptcp_subflow_error_report(sk, mptcp_subflow_tcp_sock(subflow))) + break; +} + /* In most cases we will be able to lock the mptcp socket. If its already * owned, we need to defer to the work queue to avoid ABBA deadlock. */ @@ -846,6 +886,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) mptcp_sockopt_sync_locked(msk, ssk); mptcp_subflow_joined(msk, ssk); + mptcp_stop_tout_timer(sk); return true; } @@ -865,12 +906,12 @@ static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list } } -static bool mptcp_timer_pending(struct sock *sk) +static bool mptcp_rtx_timer_pending(struct sock *sk) { return timer_pending(&inet_csk(sk)->icsk_retransmit_timer); } -static void mptcp_reset_timer(struct sock *sk) +static void mptcp_reset_rtx_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); unsigned long tout; @@ -1054,10 +1095,10 @@ static void __mptcp_clean_una(struct sock *sk) out: if (snd_una == READ_ONCE(msk->snd_nxt) && snd_una == READ_ONCE(msk->write_seq)) { - if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) - mptcp_stop_timer(sk); + if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) + mptcp_stop_rtx_timer(sk); } else { - mptcp_reset_timer(sk); + mptcp_reset_rtx_timer(sk); } } @@ -1606,8 +1647,8 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) out: /* ensure the rtx timer is running */ - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + if (!mptcp_rtx_timer_pending(sk)) + mptcp_reset_rtx_timer(sk); if (do_check_data_fin) mptcp_check_send_data_fin(sk); } @@ -1665,8 +1706,8 @@ out: if (copied) { tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + if (!mptcp_rtx_timer_pending(sk)) + mptcp_reset_rtx_timer(sk); if (msk->snd_data_fin_enable && msk->snd_nxt + 1 == msk->write_seq) @@ -2227,7 +2268,7 @@ static void mptcp_retransmit_timer(struct timer_list *t) sock_put(sk); } -static void mptcp_timeout_timer(struct timer_list *t) +static void mptcp_tout_timer(struct timer_list *t) { struct sock *sk = from_timer(sk, t, sk_timer); @@ -2349,18 +2390,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, bool dispose_it, need_push = false; /* If the first subflow moved to a close state before accept, e.g. due - * to an incoming reset, mptcp either: - * - if either the subflow or the msk are dead, destroy the context - * (the subflow socket is deleted by inet_child_forget) and the msk - * - otherwise do nothing at the moment and take action at accept and/or - * listener shutdown - user-space must be able to accept() the closed - * socket. + * to an incoming reset or listener shutdown, the subflow socket is + * already deleted by inet_child_forget() and the mptcp socket can't + * survive too. */ - if (msk->in_accept_queue && msk->first == ssk) { - if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD)) - return; - + if (msk->in_accept_queue && msk->first == ssk && + (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) { /* ensure later check in mptcp_worker() will dispose the msk */ + mptcp_set_close_tout(sk, tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1)); sock_set_flag(sk, SOCK_DEAD); lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); mptcp_subflow_drop_ctx(ssk); @@ -2413,6 +2450,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, } out_release: + __mptcp_subflow_error_report(sk, ssk); release_sock(ssk); sock_put(ssk); @@ -2426,6 +2464,22 @@ out: if (need_push) __mptcp_push_pending(sk, 0); + + /* Catch every 'all subflows closed' scenario, including peers silently + * closing them, e.g. due to timeout. + * For established sockets, allow an additional timeout before closing, + * as the protocol can still create more subflows. + */ + if (list_is_singular(&msk->conn_list) && msk->first && + inet_sk_state_load(msk->first) == TCP_CLOSE) { + if (sk->sk_state != TCP_ESTABLISHED || + msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) { + inet_sk_state_store(sk, TCP_CLOSE); + mptcp_close_wake_up(sk); + } else { + mptcp_start_tout_timer(sk); + } + } } void mptcp_close_ssk(struct sock *sk, struct sock *ssk, @@ -2469,23 +2523,14 @@ static void __mptcp_close_subflow(struct sock *sk) } -static bool mptcp_should_close(const struct sock *sk) +static bool mptcp_close_tout_expired(const struct sock *sk) { - s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp; - struct mptcp_subflow_context *subflow; - - if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue) - return true; + if (!inet_csk(sk)->icsk_mtup.probe_timestamp || + sk->sk_state == TCP_CLOSE) + return false; - /* if all subflows are in closed status don't bother with additional - * timeout - */ - mptcp_for_each_subflow(mptcp_sk(sk), subflow) { - if (inet_sk_state_load(mptcp_subflow_tcp_sock(subflow)) != - TCP_CLOSE) - return false; - } - return true; + return time_after32(tcp_jiffies32, + inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN); } static void mptcp_check_fastclose(struct mptcp_sock *msk) @@ -2513,15 +2558,15 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) /* Mirror the tcp_reset() error propagation */ switch (sk->sk_state) { case TCP_SYN_SENT: - sk->sk_err = ECONNREFUSED; + WRITE_ONCE(sk->sk_err, ECONNREFUSED); break; case TCP_CLOSE_WAIT: - sk->sk_err = EPIPE; + WRITE_ONCE(sk->sk_err, EPIPE); break; case TCP_CLOSE: return; default: - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); } inet_sk_state_store(sk, TCP_CLOSE); @@ -2597,27 +2642,28 @@ static void __mptcp_retrans(struct sock *sk) reset_timer: mptcp_check_and_set_pending(sk); - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + if (!mptcp_rtx_timer_pending(sk)) + mptcp_reset_rtx_timer(sk); } /* schedule the timeout timer for the relevant event: either close timeout * or mp_fail timeout. The close timeout takes precedence on the mp_fail one */ -void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout) +void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout) { struct sock *sk = (struct sock *)msk; unsigned long timeout, close_timeout; - if (!fail_tout && !sock_flag(sk, SOCK_DEAD)) + if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp) return; - close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN; + close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + + TCP_TIMEWAIT_LEN; /* the close timeout takes precedence on the fail one, and here at least one of * them is active */ - timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout; + timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout; sk_reset_timer(sk, &sk->sk_timer, timeout); } @@ -2636,8 +2682,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) mptcp_subflow_reset(ssk); WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0); unlock_sock_fast(ssk, slow); - - mptcp_reset_timeout(msk, 0); } static void mptcp_do_fastclose(struct sock *sk) @@ -2676,19 +2720,15 @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) __mptcp_close_subflow(sk); - /* There is no point in keeping around an orphaned sk timedout or - * closed, but we need the msk around to reply to incoming DATA_FIN, - * even if it is orphaned and in FIN_WAIT2 state - */ - if (sock_flag(sk, SOCK_DEAD)) { - if (mptcp_should_close(sk)) { - inet_sk_state_store(sk, TCP_CLOSE); - mptcp_do_fastclose(sk); - } - if (sk->sk_state == TCP_CLOSE) { - __mptcp_destroy_sock(sk); - goto unlock; - } + if (mptcp_close_tout_expired(sk)) { + inet_sk_state_store(sk, TCP_CLOSE); + mptcp_do_fastclose(sk); + mptcp_close_wake_up(sk); + } + + if (sock_flag(sk, SOCK_DEAD) && sk->sk_state == TCP_CLOSE) { + __mptcp_destroy_sock(sk); + goto unlock; } if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) @@ -2728,7 +2768,7 @@ static int __mptcp_init_sock(struct sock *sk) /* re-use the csk retrans timer for MPTCP-level retrans */ timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); - timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0); + timer_setup(&sk->sk_timer, mptcp_tout_timer, 0); return 0; } @@ -2820,8 +2860,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) } else { pr_debug("Sending DATA_FIN on subflow %p", ssk); tcp_send_ack(ssk); - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + if (!mptcp_rtx_timer_pending(sk)) + mptcp_reset_rtx_timer(sk); } break; } @@ -2904,7 +2944,7 @@ static void __mptcp_destroy_sock(struct sock *sk) might_sleep(); - mptcp_stop_timer(sk); + mptcp_stop_rtx_timer(sk); sk_stop_timer(sk, &sk->sk_timer); msk->pm.status = 0; @@ -2984,7 +3024,6 @@ bool __mptcp_close(struct sock *sk, long timeout) cleanup: /* orphan all the subflows */ - inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast_nested(ssk); @@ -3021,7 +3060,7 @@ cleanup: __mptcp_destroy_sock(sk); do_cancel_work = true; } else { - mptcp_reset_timeout(msk, 0); + mptcp_start_tout_timer(sk); } return do_cancel_work; @@ -3084,8 +3123,8 @@ static int mptcp_disconnect(struct sock *sk, int flags) mptcp_check_listen_stop(sk); inet_sk_state_store(sk, TCP_CLOSE); - mptcp_stop_timer(sk); - sk_stop_timer(sk, &sk->sk_timer); + mptcp_stop_rtx_timer(sk); + mptcp_stop_tout_timer(sk); if (mptcp_sk(sk)->token) mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL); @@ -3895,7 +3934,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, /* This barrier is coupled with smp_wmb() in __mptcp_error_report() */ smp_rmb(); - if (sk->sk_err) + if (READ_ONCE(sk->sk_err)) mask |= EPOLLERR; return mask; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d77b25636125b..91d89a0aeb586 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -681,7 +681,29 @@ void mptcp_get_options(const struct sk_buff *skb, void mptcp_finish_connect(struct sock *sk); void __mptcp_set_connected(struct sock *sk); -void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout); +void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout); + +static inline void mptcp_stop_tout_timer(struct sock *sk) +{ + if (!inet_csk(sk)->icsk_mtup.probe_timestamp) + return; + + sk_stop_timer(sk, &sk->sk_timer); + inet_csk(sk)->icsk_mtup.probe_timestamp = 0; +} + +static inline void mptcp_set_close_tout(struct sock *sk, unsigned long tout) +{ + /* avoid 0 timestamp, as that means no close timeout */ + inet_csk(sk)->icsk_mtup.probe_timestamp = tout ? : 1; +} + +static inline void mptcp_start_tout_timer(struct sock *sk) +{ + mptcp_set_close_tout(sk, tcp_jiffies32); + mptcp_reset_tout_timer(mptcp_sk(sk), 0); +} + static inline bool mptcp_is_fully_established(struct sock *sk) { return inet_sk_state_load(sk) == TCP_ESTABLISHED && diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 52a747a80e88e..b93b08a75017b 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1161,7 +1161,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) WRITE_ONCE(subflow->fail_tout, fail_tout); tcp_send_ack(ssk); - mptcp_reset_timeout(msk, subflow->fail_tout); + mptcp_reset_tout_timer(msk, subflow->fail_tout); } static bool subflow_check_data_avail(struct sock *ssk) @@ -1248,7 +1248,7 @@ fallback: subflow->reset_reason = MPTCP_RST_EMPTCP; reset: - ssk->sk_err = EBADMSG; + WRITE_ONCE(ssk->sk_err, EBADMSG); tcp_set_state(ssk, TCP_CLOSE); while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); @@ -1305,42 +1305,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space) *full_space = tcp_full_space(sk); } -void __mptcp_error_report(struct sock *sk) -{ - struct mptcp_subflow_context *subflow; - struct mptcp_sock *msk = mptcp_sk(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - int err = sock_error(ssk); - int ssk_state; - - if (!err) - continue; - - /* only propagate errors on fallen-back sockets or - * on MPC connect - */ - if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) - continue; - - /* We need to propagate only transition to CLOSE state. - * Orphaned socket will see such state change via - * subflow_sched_work_if_closed() and that path will properly - * destroy the msk as needed. - */ - ssk_state = inet_sk_state_load(ssk); - if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) - inet_sk_state_store(sk, ssk_state); - sk->sk_err = -err; - - /* This barrier is coupled with smp_rmb() in mptcp_poll() */ - smp_wmb(); - sk_error_report(sk); - break; - } -} - static void subflow_error_report(struct sock *ssk) { struct sock *sk = mptcp_subflow_ctx(ssk)->conn; @@ -1527,6 +1491,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, mptcp_sock_graft(ssk, sk->sk_socket); iput(SOCK_INODE(sf)); WRITE_ONCE(msk->allow_infinite_fallback, false); + mptcp_stop_tout_timer(sk); return 0; failed_unlink: diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index d4fe7bb4f853a..6574f4e651b1a 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1507,8 +1507,8 @@ static int make_send_sock(struct netns_ipvs *ipvs, int id, } get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id); - result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr, - salen, 0); + result = kernel_connect(sock, (struct sockaddr *)&mcast_addr, + salen, 0); if (result < 0) { pr_err("Error connecting to the multicast addr\n"); goto error; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 7247af51bdfc4..c94a9971d790c 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -112,7 +112,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { /* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA}, /* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/ /* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */ -/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */ +/* cookie_ack */ {sCL, sCL, sCW, sES, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */ /* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL}, /* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, /* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, @@ -126,7 +126,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { /* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV}, /* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV}, /* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV}, -/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */ +/* cookie_echo */ {sIV, sCL, sCE, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */ /* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV}, /* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV}, /* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, @@ -426,6 +426,9 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, /* (D) vtag must be same as init_vtag as found in INIT_ACK */ if (sh->vtag != ct->proto.sctp.vtag[dir]) goto out_unlock; + } else if (sch->type == SCTP_CID_COOKIE_ACK) { + ct->proto.sctp.init[dir] = 0; + ct->proto.sctp.init[!dir] = 0; } else if (sch->type == SCTP_CID_HEARTBEAT) { if (ct->proto.sctp.vtag[dir] == 0) { pr_debug("Setting %d vtag %x for dir %d\n", sch->type, sh->vtag, dir); @@ -474,16 +477,18 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, } /* If it is an INIT or an INIT ACK note down the vtag */ - if (sch->type == SCTP_CID_INIT || - sch->type == SCTP_CID_INIT_ACK) { - struct sctp_inithdr _inithdr, *ih; + if (sch->type == SCTP_CID_INIT) { + struct sctp_inithdr _ih, *ih; - ih = skb_header_pointer(skb, offset + sizeof(_sch), - sizeof(_inithdr), &_inithdr); - if (ih == NULL) + ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih); + if (!ih) goto out_unlock; - pr_debug("Setting vtag %x for dir %d\n", - ih->init_tag, !dir); + + if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir]) + ct->proto.sctp.init[!dir] = 0; + ct->proto.sctp.init[dir] = 1; + + pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); ct->proto.sctp.vtag[!dir] = ih->init_tag; /* don't renew timeout on init retransmit so @@ -494,6 +499,24 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, old_state == SCTP_CONNTRACK_CLOSED && nf_ct_is_confirmed(ct)) ignore = true; + } else if (sch->type == SCTP_CID_INIT_ACK) { + struct sctp_inithdr _ih, *ih; + __be32 vtag; + + ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih); + if (!ih) + goto out_unlock; + + vtag = ct->proto.sctp.vtag[!dir]; + if (!ct->proto.sctp.init[!dir] && vtag && vtag != ih->init_tag) + goto out_unlock; + /* collision */ + if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir] && + vtag != ih->init_tag) + goto out_unlock; + + pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); + ct->proto.sctp.vtag[!dir] = ih->init_tag; } ct->proto.sctp.state = new_state; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 52b81dc1fcf5b..5e3dbe2652dbd 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7576,24 +7576,14 @@ static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info, return nft_delobj(&ctx, obj); } -void nft_obj_notify(struct net *net, const struct nft_table *table, - struct nft_object *obj, u32 portid, u32 seq, int event, - u16 flags, int family, int report, gfp_t gfp) +static void +__nft_obj_notify(struct net *net, const struct nft_table *table, + struct nft_object *obj, u32 portid, u32 seq, int event, + u16 flags, int family, int report, gfp_t gfp) { struct nftables_pernet *nft_net = nft_pernet(net); struct sk_buff *skb; int err; - char *buf = kasprintf(gfp, "%s:%u", - table->name, nft_net->base_seq); - - audit_log_nfcfg(buf, - family, - obj->handle, - event == NFT_MSG_NEWOBJ ? - AUDIT_NFT_OP_OBJ_REGISTER : - AUDIT_NFT_OP_OBJ_UNREGISTER, - gfp); - kfree(buf); if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) @@ -7616,13 +7606,35 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); } + +void nft_obj_notify(struct net *net, const struct nft_table *table, + struct nft_object *obj, u32 portid, u32 seq, int event, + u16 flags, int family, int report, gfp_t gfp) +{ + struct nftables_pernet *nft_net = nft_pernet(net); + char *buf = kasprintf(gfp, "%s:%u", + table->name, nft_net->base_seq); + + audit_log_nfcfg(buf, + family, + obj->handle, + event == NFT_MSG_NEWOBJ ? + AUDIT_NFT_OP_OBJ_REGISTER : + AUDIT_NFT_OP_OBJ_UNREGISTER, + gfp); + kfree(buf); + + __nft_obj_notify(net, table, obj, portid, seq, event, + flags, family, report, gfp); +} EXPORT_SYMBOL_GPL(nft_obj_notify); static void nf_tables_obj_notify(const struct nft_ctx *ctx, struct nft_object *obj, int event) { - nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event, - ctx->flags, ctx->family, ctx->report, GFP_KERNEL); + __nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, + ctx->seq, event, ctx->flags, ctx->family, + ctx->report, GFP_KERNEL); } /* diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 487572dcd6144..2660ceab3759d 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -233,10 +233,9 @@ static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, rb_erase(&rbe->node, &priv->root); } -static int nft_rbtree_gc_elem(const struct nft_set *__set, - struct nft_rbtree *priv, - struct nft_rbtree_elem *rbe, - u8 genmask) +static const struct nft_rbtree_elem * +nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, + struct nft_rbtree_elem *rbe, u8 genmask) { struct nft_set *set = (struct nft_set *)__set; struct rb_node *prev = rb_prev(&rbe->node); @@ -246,7 +245,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set, gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC); if (!gc) - return -ENOMEM; + return ERR_PTR(-ENOMEM); /* search for end interval coming before this element. * end intervals don't carry a timeout extension, they @@ -261,6 +260,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set, prev = rb_prev(prev); } + rbe_prev = NULL; if (prev) { rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); nft_rbtree_gc_remove(net, set, priv, rbe_prev); @@ -272,7 +272,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set, */ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); if (WARN_ON_ONCE(!gc)) - return -ENOMEM; + return ERR_PTR(-ENOMEM); nft_trans_gc_elem_add(gc, rbe_prev); } @@ -280,13 +280,13 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set, nft_rbtree_gc_remove(net, set, priv, rbe); gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); if (WARN_ON_ONCE(!gc)) - return -ENOMEM; + return ERR_PTR(-ENOMEM); nft_trans_gc_elem_add(gc, rbe); nft_trans_gc_queue_sync_done(gc); - return 0; + return rbe_prev; } static bool nft_rbtree_update_first(const struct nft_set *set, @@ -314,7 +314,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree *priv = nft_set_priv(set); u8 cur_genmask = nft_genmask_cur(net); u8 genmask = nft_genmask_next(net); - int d, err; + int d; /* Descend the tree to search for an existing element greater than the * key value to insert that is greater than the new element. This is the @@ -363,9 +363,14 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, */ if (nft_set_elem_expired(&rbe->ext) && nft_set_elem_active(&rbe->ext, cur_genmask)) { - err = nft_rbtree_gc_elem(set, priv, rbe, genmask); - if (err < 0) - return err; + const struct nft_rbtree_elem *removed_end; + + removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask); + if (IS_ERR(removed_end)) + return PTR_ERR(removed_end); + + if (removed_end == rbe_le || removed_end == rbe_ge) + return -EAGAIN; continue; } @@ -486,11 +491,18 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree_elem *rbe = elem->priv; int err; - write_lock_bh(&priv->lock); - write_seqcount_begin(&priv->count); - err = __nft_rbtree_insert(net, set, rbe, ext); - write_seqcount_end(&priv->count); - write_unlock_bh(&priv->lock); + do { + if (fatal_signal_pending(current)) + return -EINTR; + + cond_resched(); + + write_lock_bh(&priv->lock); + write_seqcount_begin(&priv->count); + err = __nft_rbtree_insert(net, set, rbe, ext); + write_seqcount_end(&priv->count); + write_unlock_bh(&priv->lock); + } while (err == -EAGAIN); return err; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 387e430a35ccc..cb833302270a6 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -352,7 +352,7 @@ static void netlink_overrun(struct sock *sk) if (!nlk_test_bit(RECV_NO_ENOBUFS, sk)) { if (!test_and_set_bit(NETLINK_S_CONGESTED, &nlk_sk(sk)->state)) { - sk->sk_err = ENOBUFS; + WRITE_ONCE(sk->sk_err, ENOBUFS); sk_error_report(sk); } } @@ -1566,7 +1566,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) goto out; } - sk->sk_err = p->code; + WRITE_ONCE(sk->sk_err, p->code); sk_error_report(sk); out: return ret; @@ -1955,7 +1955,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { ret = netlink_dump(sk); if (ret) { - sk->sk_err = -ret; + WRITE_ONCE(sk->sk_err, -ret); sk_error_report(sk); } } @@ -2443,19 +2443,24 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, flags |= NLM_F_ACK_TLVS; skb = nlmsg_new(payload + tlvlen, GFP_KERNEL); - if (!skb) { - NETLINK_CB(in_skb).sk->sk_err = ENOBUFS; - sk_error_report(NETLINK_CB(in_skb).sk); - return; - } + if (!skb) + goto err_skb; rep = nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, - NLMSG_ERROR, payload, flags); + NLMSG_ERROR, sizeof(*errmsg), flags); + if (!rep) + goto err_bad_put; errmsg = nlmsg_data(rep); errmsg->error = err; - unsafe_memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) - ? nlh->nlmsg_len : sizeof(*nlh), - /* Bounds checked by the skb layer. */); + errmsg->msg = *nlh; + + if (!(flags & NLM_F_CAPPED)) { + if (!nlmsg_append(skb, nlmsg_len(nlh))) + goto err_bad_put; + + memcpy(nlmsg_data(&errmsg->msg), nlmsg_data(nlh), + nlmsg_len(nlh)); + } if (tlvlen) netlink_ack_tlv_fill(in_skb, skb, nlh, err, extack); @@ -2463,6 +2468,14 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, nlmsg_end(skb, rep); nlmsg_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid); + + return; + +err_bad_put: + nlmsg_free(skb); +err_skb: + WRITE_ONCE(NETLINK_CB(in_skb).sk->sk_err, ENOBUFS); + sk_error_report(NETLINK_CB(in_skb).sk); } EXPORT_SYMBOL(netlink_ack); diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index f60e424e06076..6705bb895e239 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -1636,7 +1636,9 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) timer_setup(&local->sdreq_timer, nfc_llcp_sdreq_timer, 0); INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work); + spin_lock(&llcp_devices_lock); list_add(&local->list, &llcp_devices); + spin_unlock(&llcp_devices_lock); return 0; } diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index f0c477c5d1db4..d788c6d28986f 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -173,7 +173,7 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) * own the socket */ rds_tcp_set_callbacks(sock, cp); - ret = sock->ops->connect(sock, addr, addrlen, O_NONBLOCK); + ret = kernel_connect(sock, addr, addrlen, O_NONBLOCK); rdsdebug("connect to address %pI6c returned %d\n", &conn->c_faddr, ret); if (ret == -EINPROGRESS) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 3460abceba443..2965a12fe8aa2 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1161,8 +1161,7 @@ int sctp_assoc_update(struct sctp_association *asoc, /* Add any peer addresses from the new association. */ list_for_each_entry(trans, &new->peer.transport_addr_list, transports) - if (!sctp_assoc_lookup_paddr(asoc, &trans->ipaddr) && - !sctp_assoc_add_peer(asoc, &trans->ipaddr, + if (!sctp_assoc_add_peer(asoc, &trans->ipaddr, GFP_ATOMIC, trans->state)) return -ENOMEM; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 32e3669adf146..e25dc17091311 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2449,6 +2449,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, if (trans) { trans->hbinterval = msecs_to_jiffies(params->spp_hbinterval); + sctp_transport_reset_hb_timer(trans); } else if (asoc) { asoc->hbinterval = msecs_to_jiffies(params->spp_hbinterval); diff --git a/net/socket.c b/net/socket.c index d281a7ef4b1d3..b0169168e3f4e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -720,6 +720,14 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) return ret; } +static int __sock_sendmsg(struct socket *sock, struct msghdr *msg) +{ + int err = security_socket_sendmsg(sock, msg, + msg_data_left(msg)); + + return err ?: sock_sendmsg_nosec(sock, msg); +} + /** * sock_sendmsg - send a message through @sock * @sock: socket @@ -730,10 +738,19 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) */ int sock_sendmsg(struct socket *sock, struct msghdr *msg) { - int err = security_socket_sendmsg(sock, msg, - msg_data_left(msg)); + struct sockaddr_storage *save_addr = (struct sockaddr_storage *)msg->msg_name; + struct sockaddr_storage address; + int ret; - return err ?: sock_sendmsg_nosec(sock, msg); + if (msg->msg_name) { + memcpy(&address, msg->msg_name, msg->msg_namelen); + msg->msg_name = &address; + } + + ret = __sock_sendmsg(sock, msg); + msg->msg_name = save_addr; + + return ret; } EXPORT_SYMBOL(sock_sendmsg); @@ -1110,7 +1127,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) if (sock->type == SOCK_SEQPACKET) msg.msg_flags |= MSG_EOR; - res = sock_sendmsg(sock, &msg); + res = __sock_sendmsg(sock, &msg); *from = msg.msg_iter; return res; } @@ -2114,7 +2131,7 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; msg.msg_flags = flags; - err = sock_sendmsg(sock, &msg); + err = __sock_sendmsg(sock, &msg); out_put: fput_light(sock->file, fput_needed); @@ -2479,7 +2496,7 @@ static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys, err = sock_sendmsg_nosec(sock, msg_sys); goto out_freectl; } - err = sock_sendmsg(sock, msg_sys); + err = __sock_sendmsg(sock, msg_sys); /* * If this is sendmmsg() and sending to current destination address was * successful, remember it. diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index 2b236d95a6469..65f59739a041a 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -1441,14 +1441,14 @@ static int tipc_crypto_key_revoke(struct net *net, u8 tx_key) struct tipc_crypto *tx = tipc_net(net)->crypto_tx; struct tipc_key key; - spin_lock(&tx->lock); + spin_lock_bh(&tx->lock); key = tx->key; WARN_ON(!key.active || tx_key != key.active); /* Free the active key */ tipc_crypto_key_set_state(tx, key.passive, 0, key.pending); tipc_crypto_key_detach(tx->aead[key.active], &tx->lock); - spin_unlock(&tx->lock); + spin_unlock_bh(&tx->lock); pr_warn("%s: key is revoked\n", tx->name); return -EKEYREVOKED; diff --git a/net/wireless/core.c b/net/wireless/core.c index 609b79fe4a748..2c79604672062 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -408,6 +408,34 @@ static void cfg80211_propagate_cac_done_wk(struct work_struct *work) rtnl_unlock(); } +static void cfg80211_wiphy_work(struct work_struct *work) +{ + struct cfg80211_registered_device *rdev; + struct wiphy_work *wk; + + rdev = container_of(work, struct cfg80211_registered_device, wiphy_work); + + wiphy_lock(&rdev->wiphy); + if (rdev->suspended) + goto out; + + spin_lock_irq(&rdev->wiphy_work_lock); + wk = list_first_entry_or_null(&rdev->wiphy_work_list, + struct wiphy_work, entry); + if (wk) { + list_del_init(&wk->entry); + if (!list_empty(&rdev->wiphy_work_list)) + schedule_work(work); + spin_unlock_irq(&rdev->wiphy_work_lock); + + wk->func(&rdev->wiphy, wk); + } else { + spin_unlock_irq(&rdev->wiphy_work_lock); + } +out: + wiphy_unlock(&rdev->wiphy); +} + /* exported functions */ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv, @@ -533,6 +561,9 @@ use_default_name: return NULL; } + INIT_WORK(&rdev->wiphy_work, cfg80211_wiphy_work); + INIT_LIST_HEAD(&rdev->wiphy_work_list); + spin_lock_init(&rdev->wiphy_work_lock); INIT_WORK(&rdev->rfkill_block, cfg80211_rfkill_block_work); INIT_WORK(&rdev->conn_work, cfg80211_conn_work); INIT_WORK(&rdev->event_work, cfg80211_event_work); @@ -1011,6 +1042,31 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy) } EXPORT_SYMBOL(wiphy_rfkill_start_polling); +void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev) +{ + unsigned int runaway_limit = 100; + unsigned long flags; + + lockdep_assert_held(&rdev->wiphy.mtx); + + spin_lock_irqsave(&rdev->wiphy_work_lock, flags); + while (!list_empty(&rdev->wiphy_work_list)) { + struct wiphy_work *wk; + + wk = list_first_entry(&rdev->wiphy_work_list, + struct wiphy_work, entry); + list_del_init(&wk->entry); + spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); + + wk->func(&rdev->wiphy, wk); + + spin_lock_irqsave(&rdev->wiphy_work_lock, flags); + if (WARN_ON(--runaway_limit == 0)) + INIT_LIST_HEAD(&rdev->wiphy_work_list); + } + spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); +} + void wiphy_unregister(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); @@ -1049,9 +1105,19 @@ void wiphy_unregister(struct wiphy *wiphy) cfg80211_rdev_list_generation++; device_del(&rdev->wiphy.dev); +#ifdef CONFIG_PM + if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) + rdev_set_wakeup(rdev, false); +#endif + + /* surely nothing is reachable now, clean up work */ + cfg80211_process_wiphy_works(rdev); wiphy_unlock(&rdev->wiphy); rtnl_unlock(); + /* this has nothing to do now but make sure it's gone */ + cancel_work_sync(&rdev->wiphy_work); + flush_work(&rdev->scan_done_wk); cancel_work_sync(&rdev->conn_work); flush_work(&rdev->event_work); @@ -1064,10 +1130,6 @@ void wiphy_unregister(struct wiphy *wiphy) flush_work(&rdev->mgmt_registrations_update_wk); flush_work(&rdev->background_cac_abort_wk); -#ifdef CONFIG_PM - if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) - rdev_set_wakeup(rdev, false); -#endif cfg80211_rdev_free_wowlan(rdev); cfg80211_rdev_free_coalesce(rdev); } @@ -1114,16 +1176,11 @@ void wiphy_rfkill_set_hw_state_reason(struct wiphy *wiphy, bool blocked, } EXPORT_SYMBOL(wiphy_rfkill_set_hw_state_reason); -void cfg80211_cqm_config_free(struct wireless_dev *wdev) -{ - kfree(wdev->cqm_config); - wdev->cqm_config = NULL; -} - static void _cfg80211_unregister_wdev(struct wireless_dev *wdev, bool unregister_netdev) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + struct cfg80211_cqm_config *cqm_config; unsigned int link_id; ASSERT_RTNL(); @@ -1162,11 +1219,10 @@ static void _cfg80211_unregister_wdev(struct wireless_dev *wdev, kfree_sensitive(wdev->wext.keys); wdev->wext.keys = NULL; #endif - /* only initialized if we have a netdev */ - if (wdev->netdev) - flush_work(&wdev->disconnect_wk); - - cfg80211_cqm_config_free(wdev); + wiphy_work_cancel(wdev->wiphy, &wdev->cqm_rssi_work); + /* deleted from the list, so can't be found from nl80211 any more */ + cqm_config = rcu_access_pointer(wdev->cqm_config); + kfree_rcu(cqm_config, rcu_head); /* * Ensure that all events have been processed and @@ -1318,6 +1374,8 @@ void cfg80211_init_wdev(struct wireless_dev *wdev) wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; #endif + wiphy_work_init(&wdev->cqm_rssi_work, cfg80211_cqm_rssi_notify_work); + if (wdev->wiphy->flags & WIPHY_FLAG_PS_ON_BY_DEFAULT) wdev->ps = true; else @@ -1439,6 +1497,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, cfg80211_leave(rdev, wdev); cfg80211_remove_links(wdev); wiphy_unlock(&rdev->wiphy); + /* since we just did cfg80211_leave() nothing to do there */ + cancel_work_sync(&wdev->disconnect_wk); break; case NETDEV_DOWN: wiphy_lock(&rdev->wiphy); @@ -1548,6 +1608,66 @@ static struct pernet_operations cfg80211_pernet_ops = { .exit = cfg80211_pernet_exit, }; +void wiphy_work_queue(struct wiphy *wiphy, struct wiphy_work *work) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + unsigned long flags; + + spin_lock_irqsave(&rdev->wiphy_work_lock, flags); + if (list_empty(&work->entry)) + list_add_tail(&work->entry, &rdev->wiphy_work_list); + spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); + + schedule_work(&rdev->wiphy_work); +} +EXPORT_SYMBOL_GPL(wiphy_work_queue); + +void wiphy_work_cancel(struct wiphy *wiphy, struct wiphy_work *work) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + unsigned long flags; + + lockdep_assert_held(&wiphy->mtx); + + spin_lock_irqsave(&rdev->wiphy_work_lock, flags); + if (!list_empty(&work->entry)) + list_del_init(&work->entry); + spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); +} +EXPORT_SYMBOL_GPL(wiphy_work_cancel); + +void wiphy_delayed_work_timer(struct timer_list *t) +{ + struct wiphy_delayed_work *dwork = from_timer(dwork, t, timer); + + wiphy_work_queue(dwork->wiphy, &dwork->work); +} +EXPORT_SYMBOL(wiphy_delayed_work_timer); + +void wiphy_delayed_work_queue(struct wiphy *wiphy, + struct wiphy_delayed_work *dwork, + unsigned long delay) +{ + if (!delay) { + wiphy_work_queue(wiphy, &dwork->work); + return; + } + + dwork->wiphy = wiphy; + mod_timer(&dwork->timer, jiffies + delay); +} +EXPORT_SYMBOL_GPL(wiphy_delayed_work_queue); + +void wiphy_delayed_work_cancel(struct wiphy *wiphy, + struct wiphy_delayed_work *dwork) +{ + lockdep_assert_held(&wiphy->mtx); + + del_timer_sync(&dwork->timer); + wiphy_work_cancel(wiphy, &dwork->work); +} +EXPORT_SYMBOL_GPL(wiphy_delayed_work_cancel); + static int __init cfg80211_init(void) { int err; diff --git a/net/wireless/core.h b/net/wireless/core.h index 775e16cb99eda..86fd79912254d 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -108,6 +108,12 @@ struct cfg80211_registered_device { /* lock for all wdev lists */ spinlock_t mgmt_registrations_lock; + struct work_struct wiphy_work; + struct list_head wiphy_work_list; + /* protects the list above */ + spinlock_t wiphy_work_lock; + bool suspended; + /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wiphy wiphy __aligned(NETDEV_ALIGN); @@ -287,12 +293,17 @@ struct cfg80211_beacon_registration { }; struct cfg80211_cqm_config { + struct rcu_head rcu_head; u32 rssi_hyst; s32 last_rssi_event_value; + enum nl80211_cqm_rssi_threshold_event last_rssi_event_type; int n_rssi_thresholds; s32 rssi_thresholds[]; }; +void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, + struct wiphy_work *work); + void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev); /* free object */ @@ -450,6 +461,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, struct net_device *dev, enum nl80211_iftype ntype, struct vif_params *params); void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev); +void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev); void cfg80211_process_wdev_events(struct wireless_dev *wdev); bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range, @@ -556,8 +568,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, #define CFG80211_DEV_WARN_ON(cond) ({bool __r = (cond); __r; }) #endif -void cfg80211_cqm_config_free(struct wireless_dev *wdev); - void cfg80211_release_pmsr(struct wireless_dev *wdev, u32 portid); void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev); void cfg80211_pmsr_free_wk(struct work_struct *work); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 12c7c89d5be1d..1d993a490ac4b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12565,7 +12565,8 @@ static int nl80211_set_cqm_txe(struct genl_info *info, } static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, - struct net_device *dev) + struct net_device *dev, + struct cfg80211_cqm_config *cqm_config) { struct wireless_dev *wdev = dev->ieee80211_ptr; s32 last, low, high; @@ -12574,7 +12575,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, int err; /* RSSI reporting disabled? */ - if (!wdev->cqm_config) + if (!cqm_config) return rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0); /* @@ -12583,7 +12584,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, * connection is established and enough beacons received to calculate * the average. */ - if (!wdev->cqm_config->last_rssi_event_value && + if (!cqm_config->last_rssi_event_value && wdev->links[0].client.current_bss && rdev->ops->get_station) { struct station_info sinfo = {}; @@ -12597,30 +12598,30 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, cfg80211_sinfo_release_content(&sinfo); if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG)) - wdev->cqm_config->last_rssi_event_value = + cqm_config->last_rssi_event_value = (s8) sinfo.rx_beacon_signal_avg; } - last = wdev->cqm_config->last_rssi_event_value; - hyst = wdev->cqm_config->rssi_hyst; - n = wdev->cqm_config->n_rssi_thresholds; + last = cqm_config->last_rssi_event_value; + hyst = cqm_config->rssi_hyst; + n = cqm_config->n_rssi_thresholds; for (i = 0; i < n; i++) { i = array_index_nospec(i, n); - if (last < wdev->cqm_config->rssi_thresholds[i]) + if (last < cqm_config->rssi_thresholds[i]) break; } low_index = i - 1; if (low_index >= 0) { low_index = array_index_nospec(low_index, n); - low = wdev->cqm_config->rssi_thresholds[low_index] - hyst; + low = cqm_config->rssi_thresholds[low_index] - hyst; } else { low = S32_MIN; } if (i < n) { i = array_index_nospec(i, n); - high = wdev->cqm_config->rssi_thresholds[i] + hyst - 1; + high = cqm_config->rssi_thresholds[i] + hyst - 1; } else { high = S32_MAX; } @@ -12633,6 +12634,7 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, u32 hysteresis) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_cqm_config *cqm_config = NULL, *old; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; int i, err; @@ -12650,10 +12652,6 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; - wdev_lock(wdev); - cfg80211_cqm_config_free(wdev); - wdev_unlock(wdev); - if (n_thresholds <= 1 && rdev->ops->set_cqm_rssi_config) { if (n_thresholds == 0 || thresholds[0] == 0) /* Disabling */ return rdev_set_cqm_rssi_config(rdev, dev, 0, 0); @@ -12670,9 +12668,10 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, n_thresholds = 0; wdev_lock(wdev); - if (n_thresholds) { - struct cfg80211_cqm_config *cqm_config; + old = rcu_dereference_protected(wdev->cqm_config, + lockdep_is_held(&wdev->mtx)); + if (n_thresholds) { cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds, n_thresholds), GFP_KERNEL); @@ -12687,11 +12686,18 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, flex_array_size(cqm_config, rssi_thresholds, n_thresholds)); - wdev->cqm_config = cqm_config; + rcu_assign_pointer(wdev->cqm_config, cqm_config); + } else { + RCU_INIT_POINTER(wdev->cqm_config, NULL); } - err = cfg80211_cqm_rssi_update(rdev, dev); - + err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config); + if (err) { + rcu_assign_pointer(wdev->cqm_config, old); + kfree_rcu(cqm_config, rcu_head); + } else { + kfree_rcu(old, rcu_head); + } unlock: wdev_unlock(wdev); @@ -18719,9 +18725,8 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, s32 rssi_level, gfp_t gfp) { - struct sk_buff *msg; struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + struct cfg80211_cqm_config *cqm_config; trace_cfg80211_cqm_rssi_notify(dev, rssi_event, rssi_level); @@ -18729,18 +18734,41 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH)) return; - if (wdev->cqm_config) { - wdev->cqm_config->last_rssi_event_value = rssi_level; + rcu_read_lock(); + cqm_config = rcu_dereference(wdev->cqm_config); + if (cqm_config) { + cqm_config->last_rssi_event_value = rssi_level; + cqm_config->last_rssi_event_type = rssi_event; + wiphy_work_queue(wdev->wiphy, &wdev->cqm_rssi_work); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL(cfg80211_cqm_rssi_notify); + +void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, struct wiphy_work *work) +{ + struct wireless_dev *wdev = container_of(work, struct wireless_dev, + cqm_rssi_work); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + enum nl80211_cqm_rssi_threshold_event rssi_event; + struct cfg80211_cqm_config *cqm_config; + struct sk_buff *msg; + s32 rssi_level; - cfg80211_cqm_rssi_update(rdev, dev); + wdev_lock(wdev); + cqm_config = rcu_dereference_protected(wdev->cqm_config, + lockdep_is_held(&wdev->mtx)); + if (!wdev->cqm_config) + goto unlock; - if (rssi_level == 0) - rssi_level = wdev->cqm_config->last_rssi_event_value; - } + cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config); - msg = cfg80211_prepare_cqm(dev, NULL, gfp); + rssi_level = cqm_config->last_rssi_event_value; + rssi_event = cqm_config->last_rssi_event_type; + + msg = cfg80211_prepare_cqm(wdev->netdev, NULL, GFP_KERNEL); if (!msg) - return; + goto unlock; if (nla_put_u32(msg, NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT, rssi_event)) @@ -18750,14 +18778,15 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, rssi_level)) goto nla_put_failure; - cfg80211_send_cqm(msg, gfp); + cfg80211_send_cqm(msg, GFP_KERNEL); - return; + goto unlock; nla_put_failure: nlmsg_free(msg); + unlock: + wdev_unlock(wdev); } -EXPORT_SYMBOL(cfg80211_cqm_rssi_notify); void cfg80211_cqm_txe_notify(struct net_device *dev, const u8 *peer, u32 num_packets, diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 6e87d2cd83456..b97834284baef 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -5,7 +5,7 @@ * (for nl80211's connect() and wext) * * Copyright 2009 Johannes Berg - * Copyright (C) 2009, 2020, 2022 Intel Corporation. All rights reserved. + * Copyright (C) 2009, 2020, 2022-2023 Intel Corporation. All rights reserved. * Copyright 2017 Intel Deutschland GmbH */ @@ -1555,6 +1555,7 @@ void cfg80211_autodisconnect_wk(struct work_struct *work) container_of(work, struct wireless_dev, disconnect_wk); struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + wiphy_lock(wdev->wiphy); wdev_lock(wdev); if (wdev->conn_owner_nlportid) { @@ -1593,4 +1594,5 @@ void cfg80211_autodisconnect_wk(struct work_struct *work) } wdev_unlock(wdev); + wiphy_unlock(wdev->wiphy); } diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 0c3f05c9be27a..4d3b658030105 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -5,7 +5,7 @@ * * Copyright 2005-2006 Jiri Benc * Copyright 2006 Johannes Berg - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2021, 2023 Intel Corporation */ #include @@ -105,14 +105,18 @@ static int wiphy_suspend(struct device *dev) cfg80211_leave_all(rdev); cfg80211_process_rdev_events(rdev); } + cfg80211_process_wiphy_works(rdev); if (rdev->ops->suspend) ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config); if (ret == 1) { /* Driver refuse to configure wowlan */ cfg80211_leave_all(rdev); cfg80211_process_rdev_events(rdev); + cfg80211_process_wiphy_works(rdev); ret = rdev_suspend(rdev, NULL); } + if (ret == 0) + rdev->suspended = true; } wiphy_unlock(&rdev->wiphy); rtnl_unlock(); @@ -132,6 +136,8 @@ static int wiphy_resume(struct device *dev) wiphy_lock(&rdev->wiphy); if (rdev->wiphy.registered && rdev->ops->resume) ret = rdev_resume(rdev); + rdev->suspended = false; + schedule_work(&rdev->wiphy_work); wiphy_unlock(&rdev->wiphy); if (ret) diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 80d973144fded..111d5464c12df 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -1577,7 +1577,7 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, /* First handle the "special" cases */ if (sym_is(name, namelen, "usb")) do_usb_table(symval, sym->st_size, mod); - if (sym_is(name, namelen, "of")) + else if (sym_is(name, namelen, "of")) do_of_table(symval, sym->st_size, mod); else if (sym_is(name, namelen, "pnp")) do_pnp_device_entry(symval, sym->st_size, mod); diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig index c17660bf5f347..6ef7bde551263 100644 --- a/security/integrity/ima/Kconfig +++ b/security/integrity/ima/Kconfig @@ -29,9 +29,11 @@ config IMA to learn more about IMA. If unsure, say N. +if IMA + config IMA_KEXEC bool "Enable carrying the IMA measurement list across a soft boot" - depends on IMA && TCG_TPM && HAVE_IMA_KEXEC + depends on TCG_TPM && HAVE_IMA_KEXEC default n help TPM PCRs are only reset on a hard reboot. In order to validate @@ -43,7 +45,6 @@ config IMA_KEXEC config IMA_MEASURE_PCR_IDX int - depends on IMA range 8 14 default 10 help @@ -53,7 +54,7 @@ config IMA_MEASURE_PCR_IDX config IMA_LSM_RULES bool - depends on IMA && AUDIT && (SECURITY_SELINUX || SECURITY_SMACK || SECURITY_APPARMOR) + depends on AUDIT && (SECURITY_SELINUX || SECURITY_SMACK || SECURITY_APPARMOR) default y help Disabling this option will disregard LSM based policy rules. @@ -61,7 +62,6 @@ config IMA_LSM_RULES choice prompt "Default template" default IMA_NG_TEMPLATE - depends on IMA help Select the default IMA measurement template. @@ -80,14 +80,12 @@ endchoice config IMA_DEFAULT_TEMPLATE string - depends on IMA default "ima-ng" if IMA_NG_TEMPLATE default "ima-sig" if IMA_SIG_TEMPLATE choice prompt "Default integrity hash algorithm" default IMA_DEFAULT_HASH_SHA1 - depends on IMA help Select the default hash algorithm used for the measurement list, integrity appraisal and audit log. The compiled default @@ -117,7 +115,6 @@ endchoice config IMA_DEFAULT_HASH string - depends on IMA default "sha1" if IMA_DEFAULT_HASH_SHA1 default "sha256" if IMA_DEFAULT_HASH_SHA256 default "sha512" if IMA_DEFAULT_HASH_SHA512 @@ -126,7 +123,6 @@ config IMA_DEFAULT_HASH config IMA_WRITE_POLICY bool "Enable multiple writes to the IMA policy" - depends on IMA default n help IMA policy can now be updated multiple times. The new rules get @@ -137,7 +133,6 @@ config IMA_WRITE_POLICY config IMA_READ_POLICY bool "Enable reading back the current IMA policy" - depends on IMA default y if IMA_WRITE_POLICY default n if !IMA_WRITE_POLICY help @@ -147,7 +142,6 @@ config IMA_READ_POLICY config IMA_APPRAISE bool "Appraise integrity measurements" - depends on IMA default n help This option enables local measurement integrity appraisal. @@ -268,7 +262,7 @@ config IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY config IMA_BLACKLIST_KEYRING bool "Create IMA machine owner blacklist keyrings (EXPERIMENTAL)" depends on SYSTEM_TRUSTED_KEYRING - depends on IMA_TRUSTED_KEYRING + depends on INTEGRITY_TRUSTED_KEYRING default n help This option creates an IMA blacklist keyring, which contains all @@ -278,7 +272,7 @@ config IMA_BLACKLIST_KEYRING config IMA_LOAD_X509 bool "Load X509 certificate onto the '.ima' trusted keyring" - depends on IMA_TRUSTED_KEYRING + depends on INTEGRITY_TRUSTED_KEYRING default n help File signature verification is based on the public keys @@ -303,7 +297,6 @@ config IMA_APPRAISE_SIGNED_INIT config IMA_MEASURE_ASYMMETRIC_KEYS bool - depends on IMA depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y default y @@ -322,7 +315,8 @@ config IMA_SECURE_AND_OR_TRUSTED_BOOT config IMA_DISABLE_HTABLE bool "Disable htable to allow measurement of duplicate records" - depends on IMA default n help This option disables htable to allow measurement of duplicate records. + +endif diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c index a4dba0b751e76..1bbd1d077dfd9 100644 --- a/sound/soc/soc-utils.c +++ b/sound/soc/soc-utils.c @@ -217,6 +217,7 @@ int snd_soc_dai_is_dummy(struct snd_soc_dai *dai) return 1; return 0; } +EXPORT_SYMBOL_GPL(snd_soc_dai_is_dummy); int snd_soc_component_is_dummy(struct snd_soc_component *component) { diff --git a/sound/soc/tegra/tegra_audio_graph_card.c b/sound/soc/tegra/tegra_audio_graph_card.c index 1f2c5018bf5ac..4737e776d3837 100644 --- a/sound/soc/tegra/tegra_audio_graph_card.c +++ b/sound/soc/tegra/tegra_audio_graph_card.c @@ -10,6 +10,7 @@ #include #include #include +#include #define MAX_PLLA_OUT0_DIV 128 @@ -44,6 +45,21 @@ struct tegra_audio_cdata { unsigned int plla_out0_rates[NUM_RATE_TYPE]; }; +static bool need_clk_update(struct snd_soc_dai *dai) +{ + if (snd_soc_dai_is_dummy(dai) || + !dai->driver->ops || + !dai->driver->name) + return false; + + if (strstr(dai->driver->name, "I2S") || + strstr(dai->driver->name, "DMIC") || + strstr(dai->driver->name, "DSPK")) + return true; + + return false; +} + /* Setup PLL clock as per the given sample rate */ static int tegra_audio_graph_update_pll(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) @@ -140,19 +156,7 @@ static int tegra_audio_graph_hw_params(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); int err; - /* - * This gets called for each DAI link (FE or BE) when DPCM is used. - * We may not want to update PLLA rate for each call. So PLLA update - * must be restricted to external I/O links (I2S, DMIC or DSPK) since - * they actually depend on it. I/O modules update their clocks in - * hw_param() of their respective component driver and PLLA rate - * update here helps them to derive appropriate rates. - * - * TODO: When more HW accelerators get added (like sample rate - * converter, volume gain controller etc., which don't really - * depend on PLLA) we need a better way to filter here. - */ - if (cpu_dai->driver->ops && rtd->dai_link->no_pcm) { + if (need_clk_update(cpu_dai)) { err = tegra_audio_graph_update_pll(substream, params); if (err) return err; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 53bc487947197..92dbe89dafbf5 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3112,6 +3112,11 @@ union bpf_attr { * **BPF_FIB_LOOKUP_OUTPUT** * Perform lookup from an egress perspective (default is * ingress). + * **BPF_FIB_LOOKUP_SKIP_NEIGH** + * Skip the neighbour table lookup. *params*->dmac + * and *params*->smac will not be set as output. A common + * use case is to call **bpf_redirect_neigh**\ () after + * doing **bpf_fib_lookup**\ (). * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -6678,6 +6683,7 @@ struct bpf_raw_tracepoint_args { enum { BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_OUTPUT = (1U << 1), + BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), }; enum { diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore index 4cb887b574138..4b2928e1c19d8 100644 --- a/tools/testing/selftests/netfilter/.gitignore +++ b/tools/testing/selftests/netfilter/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only nf-queue connect_close +audit_logread diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 3686bfa6c58d7..321db8850da00 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -6,13 +6,13 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ - conntrack_vrf.sh nft_synproxy.sh rpath.sh + conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh HOSTPKG_CONFIG := pkg-config CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) -TEST_GEN_FILES = nf-queue connect_close +TEST_GEN_FILES = nf-queue connect_close audit_logread include ../lib.mk diff --git a/tools/testing/selftests/netfilter/audit_logread.c b/tools/testing/selftests/netfilter/audit_logread.c new file mode 100644 index 0000000000000..a0a880fc2d9de --- /dev/null +++ b/tools/testing/selftests/netfilter/audit_logread.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int fd; + +#define MAX_AUDIT_MESSAGE_LENGTH 8970 +struct audit_message { + struct nlmsghdr nlh; + union { + struct audit_status s; + char data[MAX_AUDIT_MESSAGE_LENGTH]; + } u; +}; + +int audit_recv(int fd, struct audit_message *rep) +{ + struct sockaddr_nl addr; + socklen_t addrlen = sizeof(addr); + int ret; + + do { + ret = recvfrom(fd, rep, sizeof(*rep), 0, + (struct sockaddr *)&addr, &addrlen); + } while (ret < 0 && errno == EINTR); + + if (ret < 0 || + addrlen != sizeof(addr) || + addr.nl_pid != 0 || + rep->nlh.nlmsg_type == NLMSG_ERROR) /* short-cut for now */ + return -1; + + return ret; +} + +int audit_send(int fd, uint16_t type, uint32_t key, uint32_t val) +{ + static int seq = 0; + struct audit_message msg = { + .nlh = { + .nlmsg_len = NLMSG_SPACE(sizeof(msg.u.s)), + .nlmsg_type = type, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + .nlmsg_seq = ++seq, + }, + .u.s = { + .mask = key, + .enabled = key == AUDIT_STATUS_ENABLED ? val : 0, + .pid = key == AUDIT_STATUS_PID ? val : 0, + } + }; + struct sockaddr_nl addr = { + .nl_family = AF_NETLINK, + }; + int ret; + + do { + ret = sendto(fd, &msg, msg.nlh.nlmsg_len, 0, + (struct sockaddr *)&addr, sizeof(addr)); + } while (ret < 0 && errno == EINTR); + + if (ret != (int)msg.nlh.nlmsg_len) + return -1; + return 0; +} + +int audit_set(int fd, uint32_t key, uint32_t val) +{ + struct audit_message rep = { 0 }; + int ret; + + ret = audit_send(fd, AUDIT_SET, key, val); + if (ret) + return ret; + + ret = audit_recv(fd, &rep); + if (ret < 0) + return ret; + return 0; +} + +int readlog(int fd) +{ + struct audit_message rep = { 0 }; + int ret = audit_recv(fd, &rep); + const char *sep = ""; + char *k, *v; + + if (ret < 0) + return ret; + + if (rep.nlh.nlmsg_type != AUDIT_NETFILTER_CFG) + return 0; + + /* skip the initial "audit(...): " part */ + strtok(rep.u.data, " "); + + while ((k = strtok(NULL, "="))) { + v = strtok(NULL, " "); + + /* these vary and/or are uninteresting, ignore */ + if (!strcmp(k, "pid") || + !strcmp(k, "comm") || + !strcmp(k, "subj")) + continue; + + /* strip the varying sequence number */ + if (!strcmp(k, "table")) + *strchrnul(v, ':') = '\0'; + + printf("%s%s=%s", sep, k, v); + sep = " "; + } + if (*sep) { + printf("\n"); + fflush(stdout); + } + return 0; +} + +void cleanup(int sig) +{ + audit_set(fd, AUDIT_STATUS_ENABLED, 0); + close(fd); + if (sig) + exit(0); +} + +int main(int argc, char **argv) +{ + struct sigaction act = { + .sa_handler = cleanup, + }; + + fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_AUDIT); + if (fd < 0) { + perror("Can't open netlink socket"); + return -1; + } + + if (sigaction(SIGTERM, &act, NULL) < 0 || + sigaction(SIGINT, &act, NULL) < 0) { + perror("Can't set signal handler"); + close(fd); + return -1; + } + + audit_set(fd, AUDIT_STATUS_ENABLED, 1); + audit_set(fd, AUDIT_STATUS_PID, getpid()); + + while (1) + readlog(fd); +} diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config index 4faf2ce021d90..7c42b1b2c69b4 100644 --- a/tools/testing/selftests/netfilter/config +++ b/tools/testing/selftests/netfilter/config @@ -6,3 +6,4 @@ CONFIG_NFT_REDIR=m CONFIG_NFT_MASQ=m CONFIG_NFT_FLOW_OFFLOAD=m CONFIG_NF_CT_NETLINK=m +CONFIG_AUDIT=y diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/netfilter/nft_audit.sh new file mode 100755 index 0000000000000..bb34329e02a7f --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_audit.sh @@ -0,0 +1,193 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Check that audit logs generated for nft commands are as expected. + +SKIP_RC=4 +RC=0 + +nft --version >/dev/null 2>&1 || { + echo "SKIP: missing nft tool" + exit $SKIP_RC +} + +logfile=$(mktemp) +rulefile=$(mktemp) +echo "logging into $logfile" +./audit_logread >"$logfile" & +logread_pid=$! +trap 'kill $logread_pid; rm -f $logfile $rulefile' EXIT +exec 3<"$logfile" + +do_test() { # (cmd, log) + echo -n "testing for cmd: $1 ... " + cat <&3 >/dev/null + $1 >/dev/null || exit 1 + sleep 0.1 + res=$(diff -a -u <(echo "$2") - <&3) + [ $? -eq 0 ] && { echo "OK"; return; } + echo "FAIL" + grep -v '^\(---\|+++\|@@\)' <<< "$res" + ((RC--)) +} + +nft flush ruleset + +# adding tables, chains and rules + +for table in t1 t2; do + do_test "nft add table $table" \ + "table=$table family=2 entries=1 op=nft_register_table" + + do_test "nft add chain $table c1" \ + "table=$table family=2 entries=1 op=nft_register_chain" + + do_test "nft add chain $table c2; add chain $table c3" \ + "table=$table family=2 entries=2 op=nft_register_chain" + + cmd="add rule $table c1 counter" + + do_test "nft $cmd" \ + "table=$table family=2 entries=1 op=nft_register_rule" + + do_test "nft $cmd; $cmd" \ + "table=$table family=2 entries=2 op=nft_register_rule" + + cmd="" + sep="" + for chain in c2 c3; do + for i in {1..3}; do + cmd+="$sep add rule $table $chain counter" + sep=";" + done + done + do_test "nft $cmd" \ + "table=$table family=2 entries=6 op=nft_register_rule" +done + +for ((i = 0; i < 500; i++)); do + echo "add rule t2 c3 counter accept comment \"rule $i\"" +done >$rulefile +do_test "nft -f $rulefile" \ +'table=t2 family=2 entries=500 op=nft_register_rule' + +# adding sets and elements + +settype='type inet_service; counter' +setelem='{ 22, 80, 443 }' +setblock="{ $settype; elements = $setelem; }" +do_test "nft add set t1 s $setblock" \ +"table=t1 family=2 entries=4 op=nft_register_set" + +do_test "nft add set t1 s2 $setblock; add set t1 s3 { $settype; }" \ +"table=t1 family=2 entries=5 op=nft_register_set" + +do_test "nft add element t1 s3 $setelem" \ +"table=t1 family=2 entries=3 op=nft_register_setelem" + +# adding counters + +do_test 'nft add counter t1 c1' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +do_test 'nft add counter t2 c1; add counter t2 c2' \ +'table=t2 family=2 entries=2 op=nft_register_obj' + +# adding/updating quotas + +do_test 'nft add quota t1 q1 { 10 bytes }' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \ +'table=t2 family=2 entries=2 op=nft_register_obj' + +# changing the quota value triggers obj update path +do_test 'nft add quota t1 q1 { 20 bytes }' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +# resetting rules + +do_test 'nft reset rules t1 c2' \ +'table=t1 family=2 entries=3 op=nft_reset_rule' + +do_test 'nft reset rules table t1' \ +'table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule' + +do_test 'nft reset rules t2 c3' \ +'table=t2 family=2 entries=189 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=126 op=nft_reset_rule' + +do_test 'nft reset rules t2' \ +'table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=186 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=129 op=nft_reset_rule' + +do_test 'nft reset rules' \ +'table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=180 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=135 op=nft_reset_rule' + +# resetting sets and elements + +elem=(22 ,80 ,443) +relem="" +for i in {1..3}; do + relem+="${elem[((i - 1))]}" + do_test "nft reset element t1 s { $relem }" \ + "table=t1 family=2 entries=$i op=nft_reset_setelem" +done + +do_test 'nft reset set t1 s' \ +'table=t1 family=2 entries=3 op=nft_reset_setelem' + +# deleting rules + +readarray -t handles < <(nft -a list chain t1 c1 | \ + sed -n 's/.*counter.* handle \(.*\)$/\1/p') + +do_test "nft delete rule t1 c1 handle ${handles[0]}" \ +'table=t1 family=2 entries=1 op=nft_unregister_rule' + +cmd='delete rule t1 c1 handle' +do_test "nft $cmd ${handles[1]}; $cmd ${handles[2]}" \ +'table=t1 family=2 entries=2 op=nft_unregister_rule' + +do_test 'nft flush chain t1 c2' \ +'table=t1 family=2 entries=3 op=nft_unregister_rule' + +do_test 'nft flush table t2' \ +'table=t2 family=2 entries=509 op=nft_unregister_rule' + +# deleting chains + +do_test 'nft delete chain t2 c2' \ +'table=t2 family=2 entries=1 op=nft_unregister_chain' + +# deleting sets and elements + +do_test 'nft delete element t1 s { 22 }' \ +'table=t1 family=2 entries=1 op=nft_unregister_setelem' + +do_test 'nft delete element t1 s { 80, 443 }' \ +'table=t1 family=2 entries=2 op=nft_unregister_setelem' + +do_test 'nft flush set t1 s2' \ +'table=t1 family=2 entries=3 op=nft_unregister_setelem' + +do_test 'nft delete set t1 s2' \ +'table=t1 family=2 entries=1 op=nft_unregister_set' + +do_test 'nft delete set t1 s3' \ +'table=t1 family=2 entries=1 op=nft_unregister_set' + +exit $RC