diff --git a/Documentation/robust-futexes.txt b/Documentation/robust-futexes.txt index 6c42c75103eb..6361fb01c9c1 100644 --- a/Documentation/robust-futexes.txt +++ b/Documentation/robust-futexes.txt @@ -218,5 +218,4 @@ All other architectures should build just fine too - but they won't have the new syscalls yet. Architectures need to implement the new futex_atomic_cmpxchg_inatomic() -inline function before writing up the syscalls (that function returns --ENOSYS right now). +inline function before writing up the syscalls. diff --git a/Makefile b/Makefile index d7b3c8e3ff3e..46a0ae537182 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 1 -SUBLEVEL = 15 +SUBLEVEL = 16 EXTRAVERSION = NAME = Shy Crocodile diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 8fbd583b18e1..e9d2e578cbe6 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -51,7 +51,7 @@ endif KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -KBUILD_CFLAGS += -Wno-psabi +KBUILD_CFLAGS += $(call cc-disable-warning, psabi) KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 2d78ea6932b7..bdb3c05070a2 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -134,7 +134,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr, : "memory"); uaccess_disable(); - *uval = val; + if (!ret) + *uval = val; + return ret; } diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 9c01f04db64d..f71b84d9f294 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -277,6 +277,7 @@ __AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000) __AARCH64_INSN_FUNCS(prfm, 0x3FC00000, 0x39800000) __AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000) __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) +__AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0x38200000) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) @@ -394,6 +395,13 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, enum aarch64_insn_register state, enum aarch64_insn_size_type size, enum aarch64_insn_ldst_type type); +u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size); +u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size); u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, enum aarch64_insn_register src, int imm, enum aarch64_insn_variant variant, diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 7820a4a688fa..9e2b5882cdeb 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -734,6 +734,46 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, state); } +u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size) +{ + u32 insn = aarch64_insn_get_ldadd_value(); + + switch (size) { + case AARCH64_INSN_SIZE_32: + case AARCH64_INSN_SIZE_64: + break; + default: + pr_err("%s: unimplemented size encoding %d\n", __func__, size); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_encode_ldst_size(size, insn); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, + result); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, + address); + + return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn, + value); +} + +u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size) +{ + /* + * STADD is simply encoded as an alias for LDADD with XZR as + * the destination register. + */ + return aarch64_insn_gen_ldadd(AARCH64_INSN_REG_ZR, address, + value, size); +} + static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type, enum aarch64_insn_prfm_target target, enum aarch64_insn_prfm_policy policy, diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index 6c881659ee8a..76606e87233f 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -100,6 +100,10 @@ #define A64_STXR(sf, Rt, Rn, Rs) \ A64_LSX(sf, Rt, Rn, Rs, STORE_EX) +/* LSE atomics */ +#define A64_STADD(sf, Rn, Rs) \ + aarch64_insn_gen_stadd(Rn, Rs, A64_SIZE(sf)) + /* Add/subtract (immediate) */ #define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \ aarch64_insn_gen_add_sub_imm(Rd, Rn, imm12, \ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index a1420626fca2..df845cee438e 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -365,7 +365,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, const bool is64 = BPF_CLASS(code) == BPF_ALU64 || BPF_CLASS(code) == BPF_JMP; const bool isdw = BPF_SIZE(code) == BPF_DW; - u8 jmp_cond; + u8 jmp_cond, reg; s32 jmp_offset; #define check_imm(bits, imm) do { \ @@ -756,18 +756,28 @@ emit_cond_jmp: break; } break; + /* STX XADD: lock *(u32 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_W: /* STX XADD: lock *(u64 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_DW: - emit_a64_mov_i(1, tmp, off, ctx); - emit(A64_ADD(1, tmp, tmp, dst), ctx); - emit(A64_LDXR(isdw, tmp2, tmp), ctx); - emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); - emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx); - jmp_offset = -3; - check_imm19(jmp_offset); - emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); + if (!off) { + reg = dst; + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_ADD(1, tmp, tmp, dst), ctx); + reg = tmp; + } + if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) { + emit(A64_STADD(isdw, reg, src), ctx); + } else { + emit(A64_LDXR(isdw, tmp2, reg), ctx); + emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); + emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx); + jmp_offset = -3; + check_imm19(jmp_offset); + emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); + } break; default: diff --git a/arch/mips/include/asm/mips-gic.h b/arch/mips/include/asm/mips-gic.h index 558059a8f218..0277b56157af 100644 --- a/arch/mips/include/asm/mips-gic.h +++ b/arch/mips/include/asm/mips-gic.h @@ -314,6 +314,36 @@ static inline bool mips_gic_present(void) return IS_ENABLED(CONFIG_MIPS_GIC) && mips_gic_base; } +/** + * mips_gic_vx_map_reg() - Return GIC_Vx__MAP register offset + * @intr: A GIC local interrupt + * + * Determine the index of the GIC_VL__MAP or GIC_VO__MAP register + * within the block of GIC map registers. This is almost the same as the order + * of interrupts in the pending & mask registers, as used by enum + * mips_gic_local_interrupt, but moves the FDC interrupt & thus offsets the + * interrupts after it... + * + * Return: The map register index corresponding to @intr. + * + * The return value is suitable for use with the (read|write)_gic_v[lo]_map + * accessor functions. + */ +static inline unsigned int +mips_gic_vx_map_reg(enum mips_gic_local_interrupt intr) +{ + /* WD, Compare & Timer are 1:1 */ + if (intr <= GIC_LOCAL_INT_TIMER) + return intr; + + /* FDC moves to after Timer... */ + if (intr == GIC_LOCAL_INT_FDC) + return GIC_LOCAL_INT_TIMER + 1; + + /* As a result everything else is offset by 1 */ + return intr + 1; +} + /** * gic_get_c0_compare_int() - Return cp0 count/compare interrupt virq * diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 03b4cc0ec3a7..66ca906aa790 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -835,6 +835,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) break; } + /* + * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper + * bit in the mask to allow guests to use the mitigation even in the + * case where the host does not enable it. + */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; + } + /* * We have three CPU feature flags that are in play here: * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. @@ -852,7 +862,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); } } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index f53658dde639..dfc36cd56676 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -793,13 +793,16 @@ static struct syscore_ops mc_syscore_ops = { .resume = mc_bp_resume, }; -static int mc_cpu_online(unsigned int cpu) +static int mc_cpu_starting(unsigned int cpu) { - struct device *dev; - - dev = get_cpu_device(cpu); microcode_update_cpu(cpu); pr_debug("CPU%d added\n", cpu); + return 0; +} + +static int mc_cpu_online(unsigned int cpu) +{ + struct device *dev = get_cpu_device(cpu); if (sysfs_create_group(&dev->kobj, &mc_attr_group)) pr_err("Failed to create group for CPU%d\n", cpu); @@ -876,7 +879,9 @@ int __init microcode_init(void) goto out_ucode_group; register_syscore_ops(&mc_syscore_ops); - cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:online", + cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting", + mc_cpu_starting, NULL); + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online", mc_cpu_online, mc_cpu_down_prep); pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index c51b56e29948..f70a617b31b0 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -804,8 +804,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { struct rdt_resource *r = of->kn->parent->priv; - u32 sw_shareable = 0, hw_shareable = 0; - u32 exclusive = 0, pseudo_locked = 0; + /* + * Use unsigned long even though only 32 bits are used to ensure + * test_bit() is used safely. + */ + unsigned long sw_shareable = 0, hw_shareable = 0; + unsigned long exclusive = 0, pseudo_locked = 0; struct rdt_domain *dom; int i, hwb, swb, excl, psl; enum rdtgrp_mode mode; @@ -850,10 +854,10 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, } for (i = r->cache.cbm_len - 1; i >= 0; i--) { pseudo_locked = dom->plr ? dom->plr->cbm : 0; - hwb = test_bit(i, (unsigned long *)&hw_shareable); - swb = test_bit(i, (unsigned long *)&sw_shareable); - excl = test_bit(i, (unsigned long *)&exclusive); - psl = test_bit(i, (unsigned long *)&pseudo_locked); + hwb = test_bit(i, &hw_shareable); + swb = test_bit(i, &sw_shareable); + excl = test_bit(i, &exclusive); + psl = test_bit(i, &pseudo_locked); if (hwb && swb) seq_putc(seq, 'X'); else if (hwb && !swb) @@ -2494,26 +2498,19 @@ out_destroy: */ static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r) { - /* - * Convert the u32 _val to an unsigned long required by all the bit - * operations within this function. No more than 32 bits of this - * converted value can be accessed because all bit operations are - * additionally provided with cbm_len that is initialized during - * hardware enumeration using five bits from the EAX register and - * thus never can exceed 32 bits. - */ - unsigned long *val = (unsigned long *)_val; + unsigned long val = *_val; unsigned int cbm_len = r->cache.cbm_len; unsigned long first_bit, zero_bit; - if (*val == 0) + if (val == 0) return; - first_bit = find_first_bit(val, cbm_len); - zero_bit = find_next_zero_bit(val, cbm_len, first_bit); + first_bit = find_first_bit(&val, cbm_len); + zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); /* Clear any remaining bits to ensure contiguous region */ - bitmap_clear(val, zero_bit, cbm_len - zero_bit); + bitmap_clear(&val, zero_bit, cbm_len - zero_bit); + *_val = (u32)val; } /** diff --git a/drivers/clk/socfpga/clk-s10.c b/drivers/clk/socfpga/clk-s10.c index 8281dfbf38c2..5bed36e12951 100644 --- a/drivers/clk/socfpga/clk-s10.c +++ b/drivers/clk/socfpga/clk-s10.c @@ -103,9 +103,9 @@ static const struct stratix10_perip_cnt_clock s10_main_perip_cnt_clks[] = { { STRATIX10_NOC_CLK, "noc_clk", NULL, noc_mux, ARRAY_SIZE(noc_mux), 0, 0, 0, 0x3C, 1}, { STRATIX10_EMAC_A_FREE_CLK, "emaca_free_clk", NULL, emaca_free_mux, ARRAY_SIZE(emaca_free_mux), - 0, 0, 4, 0xB0, 0}, + 0, 0, 2, 0xB0, 0}, { STRATIX10_EMAC_B_FREE_CLK, "emacb_free_clk", NULL, emacb_free_mux, ARRAY_SIZE(emacb_free_mux), - 0, 0, 4, 0xB0, 1}, + 0, 0, 2, 0xB0, 1}, { STRATIX10_EMAC_PTP_FREE_CLK, "emac_ptp_free_clk", NULL, emac_ptp_free_mux, ARRAY_SIZE(emac_ptp_free_mux), 0, 0, 4, 0xB0, 2}, { STRATIX10_GPIO_DB_FREE_CLK, "gpio_db_free_clk", NULL, gpio_db_free_mux, diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c index 7545af763d7a..af4ace8f7369 100644 --- a/drivers/clk/tegra/clk-tegra210.c +++ b/drivers/clk/tegra/clk-tegra210.c @@ -3377,6 +3377,8 @@ static struct tegra_clk_init_table init_table[] __initdata = { { TEGRA210_CLK_I2S3_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 }, { TEGRA210_CLK_I2S4_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 }, { TEGRA210_CLK_VIMCLK_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 }, + { TEGRA210_CLK_HDA, TEGRA210_CLK_PLL_P, 51000000, 0 }, + { TEGRA210_CLK_HDA2CODEC_2X, TEGRA210_CLK_PLL_P, 48000000, 0 }, /* This MUST be the last entry. */ { TEGRA210_CLK_CLK_MAX, TEGRA210_CLK_CLK_MAX, 0, 0 }, }; diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 55b77c576c42..6d9995cbf651 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -1007,14 +1007,16 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) /* first try to find a slot in an existing linked list entry */ for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) { - rsv = __va(prsv); + rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB); index = atomic_fetch_add_unless(&rsv->count, 1, rsv->size); if (index < rsv->size) { rsv->entry[index].base = addr; rsv->entry[index].size = size; + memunmap(rsv); return 0; } + memunmap(rsv); } /* no slot found - allocate a new linked list entry */ @@ -1022,7 +1024,13 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) if (!rsv) return -ENOMEM; - rsv->size = EFI_MEMRESERVE_COUNT(PAGE_SIZE); + /* + * The memremap() call above assumes that a linux_efi_memreserve entry + * never crosses a page boundary, so let's ensure that this remains true + * even when kexec'ing a 4k pages kernel from a >4k pages kernel, by + * using SZ_4K explicitly in the size calculation below. + */ + rsv->size = EFI_MEMRESERVE_COUNT(SZ_4K); atomic_set(&rsv->count, 1); rsv->entry[0].base = addr; rsv->entry[0].size = size; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a67a63b5aa84..0c4a76bca5c6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -280,7 +280,8 @@ struct drm_i915_display_funcs { void (*get_cdclk)(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state); void (*set_cdclk)(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *cdclk_state); + const struct intel_cdclk_state *cdclk_state, + enum pipe pipe); int (*get_fifo_size)(struct drm_i915_private *dev_priv, enum i9xx_plane_id i9xx_plane); int (*compute_pipe_wm)(struct intel_crtc_state *cstate); @@ -1622,6 +1623,8 @@ struct drm_i915_private { struct intel_cdclk_state actual; /* The current hardware cdclk state */ struct intel_cdclk_state hw; + + int force_min_cdclk; } cdclk; /** @@ -1741,6 +1744,7 @@ struct drm_i915_private { * */ struct mutex av_mutex; + int audio_power_refcount; struct { struct mutex mutex; diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 5104c6bbd66f..2f3fd5bf0f11 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -741,15 +741,71 @@ void intel_init_audio_hooks(struct drm_i915_private *dev_priv) } } +static void glk_force_audio_cdclk(struct drm_i915_private *dev_priv, + bool enable) +{ + struct drm_modeset_acquire_ctx ctx; + struct drm_atomic_state *state; + int ret; + + drm_modeset_acquire_init(&ctx, 0); + state = drm_atomic_state_alloc(&dev_priv->drm); + if (WARN_ON(!state)) + return; + + state->acquire_ctx = &ctx; + +retry: + to_intel_atomic_state(state)->cdclk.force_min_cdclk_changed = true; + to_intel_atomic_state(state)->cdclk.force_min_cdclk = + enable ? 2 * 96000 : 0; + + /* + * Protects dev_priv->cdclk.force_min_cdclk + * Need to lock this here in case we have no active pipes + * and thus wouldn't lock it during the commit otherwise. + */ + ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, + &ctx); + if (!ret) + ret = drm_atomic_commit(state); + + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + drm_modeset_backoff(&ctx); + goto retry; + } + + WARN_ON(ret); + + drm_atomic_state_put(state); + + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); +} + static void i915_audio_component_get_power(struct device *kdev) { - intel_display_power_get(kdev_to_i915(kdev), POWER_DOMAIN_AUDIO); + struct drm_i915_private *dev_priv = kdev_to_i915(kdev); + + intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); + + /* Force CDCLK to 2*BCLK as long as we need audio to be powered. */ + if (dev_priv->audio_power_refcount++ == 0) + if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + glk_force_audio_cdclk(dev_priv, true); } static void i915_audio_component_put_power(struct device *kdev) { - intel_display_power_put_unchecked(kdev_to_i915(kdev), - POWER_DOMAIN_AUDIO); + struct drm_i915_private *dev_priv = kdev_to_i915(kdev); + + /* Stop forcing CDCLK to 2*BCLK if no need for audio to be powered. */ + if (--dev_priv->audio_power_refcount == 0) + if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + glk_force_audio_cdclk(dev_priv, false); + + intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_AUDIO); } static void i915_audio_component_codec_wake_override(struct device *kdev, diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 15ba950dee00..00f76261924c 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -516,7 +516,8 @@ static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) } static void vlv_set_cdclk(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *cdclk_state) + const struct intel_cdclk_state *cdclk_state, + enum pipe pipe) { int cdclk = cdclk_state->cdclk; u32 val, cmd = cdclk_state->voltage_level; @@ -598,7 +599,8 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, } static void chv_set_cdclk(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *cdclk_state) + const struct intel_cdclk_state *cdclk_state, + enum pipe pipe) { int cdclk = cdclk_state->cdclk; u32 val, cmd = cdclk_state->voltage_level; @@ -697,7 +699,8 @@ static void bdw_get_cdclk(struct drm_i915_private *dev_priv, } static void bdw_set_cdclk(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *cdclk_state) + const struct intel_cdclk_state *cdclk_state, + enum pipe pipe) { int cdclk = cdclk_state->cdclk; u32 val; @@ -987,7 +990,8 @@ static void skl_dpll0_disable(struct drm_i915_private *dev_priv) } static void skl_set_cdclk(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *cdclk_state) + const struct intel_cdclk_state *cdclk_state, + enum pipe pipe) { int cdclk = cdclk_state->cdclk; int vco = cdclk_state->vco; @@ -1158,7 +1162,7 @@ void skl_init_cdclk(struct drm_i915_private *dev_priv) cdclk_state.cdclk = skl_calc_cdclk(0, cdclk_state.vco); cdclk_state.voltage_level = skl_calc_voltage_level(cdclk_state.cdclk); - skl_set_cdclk(dev_priv, &cdclk_state); + skl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } /** @@ -1176,7 +1180,7 @@ void skl_uninit_cdclk(struct drm_i915_private *dev_priv) cdclk_state.vco = 0; cdclk_state.voltage_level = skl_calc_voltage_level(cdclk_state.cdclk); - skl_set_cdclk(dev_priv, &cdclk_state); + skl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } static int bxt_calc_cdclk(int min_cdclk) @@ -1355,7 +1359,8 @@ static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) } static void bxt_set_cdclk(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *cdclk_state) + const struct intel_cdclk_state *cdclk_state, + enum pipe pipe) { int cdclk = cdclk_state->cdclk; int vco = cdclk_state->vco; @@ -1408,11 +1413,10 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, bxt_de_pll_enable(dev_priv, vco); val = divider | skl_cdclk_decimal(cdclk); - /* - * FIXME if only the cd2x divider needs changing, it could be done - * without shutting off the pipe (if only one pipe is active). - */ - val |= BXT_CDCLK_CD2X_PIPE_NONE; + if (pipe == INVALID_PIPE) + val |= BXT_CDCLK_CD2X_PIPE_NONE; + else + val |= BXT_CDCLK_CD2X_PIPE(pipe); /* * Disable SSA Precharge when CD clock frequency < 500 MHz, * enable otherwise. @@ -1421,6 +1425,9 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; I915_WRITE(CDCLK_CTL, val); + if (pipe != INVALID_PIPE) + intel_wait_for_vblank(dev_priv, pipe); + mutex_lock(&dev_priv->pcu_lock); /* * The timeout isn't specified, the 2ms used here is based on @@ -1525,7 +1532,7 @@ void bxt_init_cdclk(struct drm_i915_private *dev_priv) } cdclk_state.voltage_level = bxt_calc_voltage_level(cdclk_state.cdclk); - bxt_set_cdclk(dev_priv, &cdclk_state); + bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } /** @@ -1543,7 +1550,7 @@ void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) cdclk_state.vco = 0; cdclk_state.voltage_level = bxt_calc_voltage_level(cdclk_state.cdclk); - bxt_set_cdclk(dev_priv, &cdclk_state); + bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } static int cnl_calc_cdclk(int min_cdclk) @@ -1663,7 +1670,8 @@ static void cnl_cdclk_pll_enable(struct drm_i915_private *dev_priv, int vco) } static void cnl_set_cdclk(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *cdclk_state) + const struct intel_cdclk_state *cdclk_state, + enum pipe pipe) { int cdclk = cdclk_state->cdclk; int vco = cdclk_state->vco; @@ -1704,13 +1712,15 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, cnl_cdclk_pll_enable(dev_priv, vco); val = divider | skl_cdclk_decimal(cdclk); - /* - * FIXME if only the cd2x divider needs changing, it could be done - * without shutting off the pipe (if only one pipe is active). - */ - val |= BXT_CDCLK_CD2X_PIPE_NONE; + if (pipe == INVALID_PIPE) + val |= BXT_CDCLK_CD2X_PIPE_NONE; + else + val |= BXT_CDCLK_CD2X_PIPE(pipe); I915_WRITE(CDCLK_CTL, val); + if (pipe != INVALID_PIPE) + intel_wait_for_vblank(dev_priv, pipe); + /* inform PCU of the change */ mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, @@ -1847,7 +1857,8 @@ static int icl_calc_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) } static void icl_set_cdclk(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *cdclk_state) + const struct intel_cdclk_state *cdclk_state, + enum pipe pipe) { unsigned int cdclk = cdclk_state->cdclk; unsigned int vco = cdclk_state->vco; @@ -1872,6 +1883,11 @@ static void icl_set_cdclk(struct drm_i915_private *dev_priv, if (dev_priv->cdclk.hw.vco != vco) cnl_cdclk_pll_enable(dev_priv, vco); + /* + * On ICL CD2X_DIV can only be 1, so we'll never end up changing the + * divider here synchronized to a pipe while CDCLK is on, nor will we + * need the corresponding vblank wait. + */ I915_WRITE(CDCLK_CTL, ICL_CDCLK_CD2X_PIPE_NONE | skl_cdclk_decimal(cdclk)); @@ -2002,7 +2018,7 @@ sanitize: sanitized_state.voltage_level = icl_calc_voltage_level(sanitized_state.cdclk); - icl_set_cdclk(dev_priv, &sanitized_state); + icl_set_cdclk(dev_priv, &sanitized_state, INVALID_PIPE); } /** @@ -2020,7 +2036,7 @@ void icl_uninit_cdclk(struct drm_i915_private *dev_priv) cdclk_state.vco = 0; cdclk_state.voltage_level = icl_calc_voltage_level(cdclk_state.cdclk); - icl_set_cdclk(dev_priv, &cdclk_state); + icl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } /** @@ -2048,7 +2064,7 @@ void cnl_init_cdclk(struct drm_i915_private *dev_priv) cdclk_state.vco = cnl_cdclk_pll_vco(dev_priv, cdclk_state.cdclk); cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk); - cnl_set_cdclk(dev_priv, &cdclk_state); + cnl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } /** @@ -2066,7 +2082,7 @@ void cnl_uninit_cdclk(struct drm_i915_private *dev_priv) cdclk_state.vco = 0; cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk); - cnl_set_cdclk(dev_priv, &cdclk_state); + cnl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } /** @@ -2085,6 +2101,27 @@ bool intel_cdclk_needs_modeset(const struct intel_cdclk_state *a, a->ref != b->ref; } +/** + * intel_cdclk_needs_cd2x_update - Determine if two CDCLK states require a cd2x divider update + * @a: first CDCLK state + * @b: second CDCLK state + * + * Returns: + * True if the CDCLK states require just a cd2x divider update, false if not. + */ +bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b) +{ + /* Older hw doesn't have the capability */ + if (INTEL_GEN(dev_priv) < 10 && !IS_GEN9_LP(dev_priv)) + return false; + + return a->cdclk != b->cdclk && + a->vco == b->vco && + a->ref == b->ref; +} + /** * intel_cdclk_changed - Determine if two CDCLK states are different * @a: first CDCLK state @@ -2100,6 +2137,26 @@ bool intel_cdclk_changed(const struct intel_cdclk_state *a, a->voltage_level != b->voltage_level; } +/** + * intel_cdclk_swap_state - make atomic CDCLK configuration effective + * @state: atomic state + * + * This is the CDCLK version of drm_atomic_helper_swap_state() since the + * helper does not handle driver-specific global state. + * + * Similarly to the atomic helpers this function does a complete swap, + * i.e. it also puts the old state into @state. This is used by the commit + * code to determine how CDCLK has changed (for instance did it increase or + * decrease). + */ +void intel_cdclk_swap_state(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + + swap(state->cdclk.logical, dev_priv->cdclk.logical); + swap(state->cdclk.actual, dev_priv->cdclk.actual); +} + void intel_dump_cdclk_state(const struct intel_cdclk_state *cdclk_state, const char *context) { @@ -2113,12 +2170,14 @@ void intel_dump_cdclk_state(const struct intel_cdclk_state *cdclk_state, * intel_set_cdclk - Push the CDCLK state to the hardware * @dev_priv: i915 device * @cdclk_state: new CDCLK state + * @pipe: pipe with which to synchronize the update * * Program the hardware based on the passed in CDCLK state, * if necessary. */ -void intel_set_cdclk(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *cdclk_state) +static void intel_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state, + enum pipe pipe) { if (!intel_cdclk_changed(&dev_priv->cdclk.hw, cdclk_state)) return; @@ -2128,7 +2187,7 @@ void intel_set_cdclk(struct drm_i915_private *dev_priv, intel_dump_cdclk_state(cdclk_state, "Changing CDCLK to"); - dev_priv->display.set_cdclk(dev_priv, cdclk_state); + dev_priv->display.set_cdclk(dev_priv, cdclk_state, pipe); if (WARN(intel_cdclk_changed(&dev_priv->cdclk.hw, cdclk_state), "cdclk state doesn't match!\n")) { @@ -2137,6 +2196,46 @@ void intel_set_cdclk(struct drm_i915_private *dev_priv, } } +/** + * intel_set_cdclk_pre_plane_update - Push the CDCLK state to the hardware + * @dev_priv: i915 device + * @old_state: old CDCLK state + * @new_state: new CDCLK state + * @pipe: pipe with which to synchronize the update + * + * Program the hardware before updating the HW plane state based on the passed + * in CDCLK state, if necessary. + */ +void +intel_set_cdclk_pre_plane_update(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *old_state, + const struct intel_cdclk_state *new_state, + enum pipe pipe) +{ + if (pipe == INVALID_PIPE || old_state->cdclk <= new_state->cdclk) + intel_set_cdclk(dev_priv, new_state, pipe); +} + +/** + * intel_set_cdclk_post_plane_update - Push the CDCLK state to the hardware + * @dev_priv: i915 device + * @old_state: old CDCLK state + * @new_state: new CDCLK state + * @pipe: pipe with which to synchronize the update + * + * Program the hardware after updating the HW plane state based on the passed + * in CDCLK state, if necessary. + */ +void +intel_set_cdclk_post_plane_update(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *old_state, + const struct intel_cdclk_state *new_state, + enum pipe pipe) +{ + if (pipe != INVALID_PIPE && old_state->cdclk > new_state->cdclk) + intel_set_cdclk(dev_priv, new_state, pipe); +} + static int intel_pixel_rate_to_cdclk(struct drm_i915_private *dev_priv, int pixel_rate) { @@ -2187,19 +2286,8 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) /* * According to BSpec, "The CD clock frequency must be at least twice * the frequency of the Azalia BCLK." and BCLK is 96 MHz by default. - * - * FIXME: Check the actual, not default, BCLK being used. - * - * FIXME: This does not depend on ->has_audio because the higher CDCLK - * is required for audio probe, also when there are no audio capable - * displays connected at probe time. This leads to unnecessarily high - * CDCLK when audio is not required. - * - * FIXME: This limit is only applied when there are displays connected - * at probe time. If we probe without displays, we'll still end up using - * the platform minimum CDCLK, failing audio probe. */ - if (INTEL_GEN(dev_priv) >= 9) + if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9) min_cdclk = max(2 * 96000, min_cdclk); /* @@ -2239,7 +2327,7 @@ static int intel_compute_min_cdclk(struct drm_atomic_state *state) intel_state->min_cdclk[i] = min_cdclk; } - min_cdclk = 0; + min_cdclk = intel_state->cdclk.force_min_cdclk; for_each_pipe(dev_priv, pipe) min_cdclk = max(intel_state->min_cdclk[pipe], min_cdclk); @@ -2300,7 +2388,8 @@ static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) vlv_calc_voltage_level(dev_priv, cdclk); if (!intel_state->active_crtcs) { - cdclk = vlv_calc_cdclk(dev_priv, 0); + cdclk = vlv_calc_cdclk(dev_priv, + intel_state->cdclk.force_min_cdclk); intel_state->cdclk.actual.cdclk = cdclk; intel_state->cdclk.actual.voltage_level = @@ -2333,7 +2422,7 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) bdw_calc_voltage_level(cdclk); if (!intel_state->active_crtcs) { - cdclk = bdw_calc_cdclk(0); + cdclk = bdw_calc_cdclk(intel_state->cdclk.force_min_cdclk); intel_state->cdclk.actual.cdclk = cdclk; intel_state->cdclk.actual.voltage_level = @@ -2405,7 +2494,7 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) skl_calc_voltage_level(cdclk); if (!intel_state->active_crtcs) { - cdclk = skl_calc_cdclk(0, vco); + cdclk = skl_calc_cdclk(intel_state->cdclk.force_min_cdclk, vco); intel_state->cdclk.actual.vco = vco; intel_state->cdclk.actual.cdclk = cdclk; @@ -2444,10 +2533,10 @@ static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) if (!intel_state->active_crtcs) { if (IS_GEMINILAKE(dev_priv)) { - cdclk = glk_calc_cdclk(0); + cdclk = glk_calc_cdclk(intel_state->cdclk.force_min_cdclk); vco = glk_de_pll_vco(dev_priv, cdclk); } else { - cdclk = bxt_calc_cdclk(0); + cdclk = bxt_calc_cdclk(intel_state->cdclk.force_min_cdclk); vco = bxt_de_pll_vco(dev_priv, cdclk); } @@ -2483,7 +2572,7 @@ static int cnl_modeset_calc_cdclk(struct drm_atomic_state *state) cnl_compute_min_voltage_level(intel_state)); if (!intel_state->active_crtcs) { - cdclk = cnl_calc_cdclk(0); + cdclk = cnl_calc_cdclk(intel_state->cdclk.force_min_cdclk); vco = cnl_cdclk_pll_vco(dev_priv, cdclk); intel_state->cdclk.actual.vco = vco; @@ -2519,7 +2608,7 @@ static int icl_modeset_calc_cdclk(struct drm_atomic_state *state) cnl_compute_min_voltage_level(intel_state)); if (!intel_state->active_crtcs) { - cdclk = icl_calc_cdclk(0, ref); + cdclk = icl_calc_cdclk(intel_state->cdclk.force_min_cdclk, ref); vco = icl_calc_cdclk_pll_vco(dev_priv, cdclk); intel_state->cdclk.actual.vco = vco; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index be4024f0e3a8..9803d713f746 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12770,10 +12770,16 @@ static int intel_modeset_checks(struct drm_atomic_state *state) return -EINVAL; } + /* keep the current setting */ + if (!intel_state->cdclk.force_min_cdclk_changed) + intel_state->cdclk.force_min_cdclk = + dev_priv->cdclk.force_min_cdclk; + intel_state->modeset = true; intel_state->active_crtcs = dev_priv->active_crtcs; intel_state->cdclk.logical = dev_priv->cdclk.logical; intel_state->cdclk.actual = dev_priv->cdclk.actual; + intel_state->cdclk.pipe = INVALID_PIPE; for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { if (new_crtc_state->active) @@ -12793,6 +12799,8 @@ static int intel_modeset_checks(struct drm_atomic_state *state) * adjusted_mode bits in the crtc directly. */ if (dev_priv->display.modeset_calc_cdclk) { + enum pipe pipe; + ret = dev_priv->display.modeset_calc_cdclk(state); if (ret < 0) return ret; @@ -12809,12 +12817,36 @@ static int intel_modeset_checks(struct drm_atomic_state *state) return ret; } + if (is_power_of_2(intel_state->active_crtcs)) { + struct drm_crtc *crtc; + struct drm_crtc_state *crtc_state; + + pipe = ilog2(intel_state->active_crtcs); + crtc = &intel_get_crtc_for_pipe(dev_priv, pipe)->base; + crtc_state = drm_atomic_get_new_crtc_state(state, crtc); + if (crtc_state && needs_modeset(crtc_state)) + pipe = INVALID_PIPE; + } else { + pipe = INVALID_PIPE; + } + /* All pipes must be switched off while we change the cdclk. */ - if (intel_cdclk_needs_modeset(&dev_priv->cdclk.actual, - &intel_state->cdclk.actual)) { + if (pipe != INVALID_PIPE && + intel_cdclk_needs_cd2x_update(dev_priv, + &dev_priv->cdclk.actual, + &intel_state->cdclk.actual)) { + ret = intel_lock_all_pipes(state); + if (ret < 0) + return ret; + + intel_state->cdclk.pipe = pipe; + } else if (intel_cdclk_needs_modeset(&dev_priv->cdclk.actual, + &intel_state->cdclk.actual)) { ret = intel_modeset_all_pipes(state); if (ret < 0) return ret; + + intel_state->cdclk.pipe = INVALID_PIPE; } DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n", @@ -12823,8 +12855,6 @@ static int intel_modeset_checks(struct drm_atomic_state *state) DRM_DEBUG_KMS("New voltage level calculated to be logical %u, actual %u\n", intel_state->cdclk.logical.voltage_level, intel_state->cdclk.actual.voltage_level); - } else { - to_intel_atomic_state(state)->cdclk.logical = dev_priv->cdclk.logical; } intel_modeset_clear_plls(state); @@ -12892,7 +12922,7 @@ static int intel_atomic_check(struct drm_device *dev, struct drm_crtc *crtc; struct drm_crtc_state *old_crtc_state, *crtc_state; int ret, i; - bool any_ms = false; + bool any_ms = intel_state->cdclk.force_min_cdclk_changed; /* Catch I915_MODE_FLAG_INHERITED */ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, @@ -13248,7 +13278,10 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) if (intel_state->modeset) { drm_atomic_helper_update_legacy_modeset_state(state->dev, state); - intel_set_cdclk(dev_priv, &dev_priv->cdclk.actual); + intel_set_cdclk_pre_plane_update(dev_priv, + &intel_state->cdclk.actual, + &dev_priv->cdclk.actual, + intel_state->cdclk.pipe); /* * SKL workaround: bspec recommends we disable the SAGV when we @@ -13277,6 +13310,12 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) /* Now enable the clocks, plane, pipe, and connectors that we set up. */ dev_priv->display.update_crtcs(state); + if (intel_state->modeset) + intel_set_cdclk_post_plane_update(dev_priv, + &intel_state->cdclk.actual, + &dev_priv->cdclk.actual, + intel_state->cdclk.pipe); + /* FIXME: We should call drm_atomic_helper_commit_hw_done() here * already, but still need the state for the delayed optimization. To * fix this: @@ -13478,8 +13517,10 @@ static int intel_atomic_commit(struct drm_device *dev, intel_state->min_voltage_level, sizeof(intel_state->min_voltage_level)); dev_priv->active_crtcs = intel_state->active_crtcs; - dev_priv->cdclk.logical = intel_state->cdclk.logical; - dev_priv->cdclk.actual = intel_state->cdclk.actual; + dev_priv->cdclk.force_min_cdclk = + intel_state->cdclk.force_min_cdclk; + + intel_cdclk_swap_state(intel_state); } drm_atomic_state_get(state); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 18e17b422701..ceaa05dfa0d0 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -480,6 +480,11 @@ struct intel_atomic_state { * state only when all crtc's are DPMS off. */ struct intel_cdclk_state actual; + + int force_min_cdclk; + bool force_min_cdclk_changed; + /* pipe to which cd2x update is synchronized */ + enum pipe pipe; } cdclk; bool dpll_set, modeset; @@ -1590,12 +1595,24 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv); void intel_update_max_cdclk(struct drm_i915_private *dev_priv); void intel_update_cdclk(struct drm_i915_private *dev_priv); void intel_update_rawclk(struct drm_i915_private *dev_priv); +bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b); bool intel_cdclk_needs_modeset(const struct intel_cdclk_state *a, const struct intel_cdclk_state *b); bool intel_cdclk_changed(const struct intel_cdclk_state *a, const struct intel_cdclk_state *b); -void intel_set_cdclk(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *cdclk_state); +void intel_cdclk_swap_state(struct intel_atomic_state *state); +void +intel_set_cdclk_pre_plane_update(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *old_state, + const struct intel_cdclk_state *new_state, + enum pipe pipe); +void +intel_set_cdclk_post_plane_update(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *old_state, + const struct intel_cdclk_state *new_state, + enum pipe pipe); void intel_dump_cdclk_state(const struct intel_cdclk_state *cdclk_state, const char *context); diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 0dce94e3c495..d0b04b0d309f 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -730,8 +730,8 @@ int roce_resolve_route_from_path(struct sa_path_rec *rec, if (rec->roce.route_resolved) return 0; - rdma_gid2ip(&sgid._sockaddr, &rec->sgid); - rdma_gid2ip(&dgid._sockaddr, &rec->dgid); + rdma_gid2ip((struct sockaddr *)&sgid, &rec->sgid); + rdma_gid2ip((struct sockaddr *)&dgid, &rec->dgid); if (sgid._sockaddr.sa_family != dgid._sockaddr.sa_family) return -EINVAL; @@ -742,7 +742,7 @@ int roce_resolve_route_from_path(struct sa_path_rec *rec, dev_addr.net = &init_net; dev_addr.sgid_attr = attr; - ret = addr_resolve(&sgid._sockaddr, &dgid._sockaddr, + ret = addr_resolve((struct sockaddr *)&sgid, (struct sockaddr *)&dgid, &dev_addr, false, true, 0); if (ret) return ret; @@ -814,22 +814,22 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, struct rdma_dev_addr dev_addr; struct resolve_cb_context ctx; union { - struct sockaddr _sockaddr; struct sockaddr_in _sockaddr_in; struct sockaddr_in6 _sockaddr_in6; } sgid_addr, dgid_addr; int ret; - rdma_gid2ip(&sgid_addr._sockaddr, sgid); - rdma_gid2ip(&dgid_addr._sockaddr, dgid); + rdma_gid2ip((struct sockaddr *)&sgid_addr, sgid); + rdma_gid2ip((struct sockaddr *)&dgid_addr, dgid); memset(&dev_addr, 0, sizeof(dev_addr)); dev_addr.net = &init_net; dev_addr.sgid_attr = sgid_attr; init_completion(&ctx.comp); - ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr, - &dev_addr, 1000, resolve_cb, true, &ctx); + ret = rdma_resolve_ip((struct sockaddr *)&sgid_addr, + (struct sockaddr *)&dgid_addr, &dev_addr, 1000, + resolve_cb, true, &ctx); if (ret) return ret; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index a7295322efbc..f4500d77f314 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -83,7 +83,6 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, struct iphdr ipv4; const struct ib_global_route *ib_grh; union { - struct sockaddr _sockaddr; struct sockaddr_in _sockaddr_in; struct sockaddr_in6 _sockaddr_in6; } sgid_addr, dgid_addr; @@ -133,9 +132,9 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, ipv4.tot_len = htons(0); ipv4.ttl = ib_grh->hop_limit; ipv4.protocol = nxthdr; - rdma_gid2ip(&sgid_addr._sockaddr, sgid); + rdma_gid2ip((struct sockaddr *)&sgid_addr, sgid); ipv4.saddr = sgid_addr._sockaddr_in.sin_addr.s_addr; - rdma_gid2ip(&dgid_addr._sockaddr, &ib_grh->dgid); + rdma_gid2ip((struct sockaddr*)&dgid_addr, &ib_grh->dgid); ipv4.daddr = dgid_addr._sockaddr_in.sin_addr.s_addr; memcpy((u8 *)ah->av + eth_sz, &ipv4, sizeof(struct iphdr)); } else { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 097e5ab2a19f..94ca0f4a4073 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2499,7 +2499,6 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, u32 vlan_id = 0xFFFF; u8 mac_addr[6], hdr_type; union { - struct sockaddr _sockaddr; struct sockaddr_in _sockaddr_in; struct sockaddr_in6 _sockaddr_in6; } sgid_addr, dgid_addr; @@ -2541,8 +2540,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, hdr_type = rdma_gid_attr_network_type(sgid_attr); if (hdr_type == RDMA_NETWORK_IPV4) { - rdma_gid2ip(&sgid_addr._sockaddr, &sgid_attr->gid); - rdma_gid2ip(&dgid_addr._sockaddr, &grh->dgid); + rdma_gid2ip((struct sockaddr *)&sgid_addr, &sgid_attr->gid); + rdma_gid2ip((struct sockaddr *)&dgid_addr, &grh->dgid); memcpy(&cmd->params.dgid[0], &dgid_addr._sockaddr_in.sin_addr.s_addr, 4); memcpy(&cmd->params.sgid[0], diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index d32268cc1174..f3985469c221 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -388,7 +388,7 @@ static void gic_all_vpes_irq_cpu_online(struct irq_data *d) intr = GIC_HWIRQ_TO_LOCAL(d->hwirq); cd = irq_data_get_irq_chip_data(d); - write_gic_vl_map(intr, cd->map); + write_gic_vl_map(mips_gic_vx_map_reg(intr), cd->map); if (cd->mask) write_gic_vl_smask(BIT(intr)); } @@ -517,7 +517,7 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq, spin_lock_irqsave(&gic_lock, flags); for_each_online_cpu(cpu) { write_gic_vl_other(mips_cm_vp_id(cpu)); - write_gic_vo_map(intr, map); + write_gic_vo_map(mips_gic_vx_map_reg(intr), map); } spin_unlock_irqrestore(&gic_lock, flags); diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c index 352e803f566e..64611633e77c 100644 --- a/drivers/md/dm-init.c +++ b/drivers/md/dm-init.c @@ -140,8 +140,8 @@ static char __init *dm_parse_table_entry(struct dm_device *dev, char *str) return ERR_PTR(-EINVAL); } /* target_args */ - dev->target_args_array[n] = kstrndup(field[3], GFP_KERNEL, - DM_MAX_STR_SIZE); + dev->target_args_array[n] = kstrndup(field[3], DM_MAX_STR_SIZE, + GFP_KERNEL); if (!dev->target_args_array[n]) return ERR_PTR(-ENOMEM); @@ -275,7 +275,7 @@ static int __init dm_init_init(void) DMERR("Argument is too big. Limit is %d\n", DM_MAX_STR_SIZE); return -EINVAL; } - str = kstrndup(create, GFP_KERNEL, DM_MAX_STR_SIZE); + str = kstrndup(create, DM_MAX_STR_SIZE, GFP_KERNEL); if (!str) return -ENOMEM; diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 9ea2b0291f20..e549392e0ea5 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -60,6 +60,7 @@ #define WRITE_LOG_VERSION 1ULL #define WRITE_LOG_MAGIC 0x6a736677736872ULL +#define WRITE_LOG_SUPER_SECTOR 0 /* * The disk format for this is braindead simple. @@ -115,6 +116,7 @@ struct log_writes_c { struct list_head logging_blocks; wait_queue_head_t wait; struct task_struct *log_kthread; + struct completion super_done; }; struct pending_block { @@ -180,6 +182,14 @@ static void log_end_io(struct bio *bio) bio_put(bio); } +static void log_end_super(struct bio *bio) +{ + struct log_writes_c *lc = bio->bi_private; + + complete(&lc->super_done); + log_end_io(bio); +} + /* * Meant to be called if there is an error, it will free all the pages * associated with the block. @@ -215,7 +225,8 @@ static int write_metadata(struct log_writes_c *lc, void *entry, bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; bio_set_dev(bio, lc->logdev->bdev); - bio->bi_end_io = log_end_io; + bio->bi_end_io = (sector == WRITE_LOG_SUPER_SECTOR) ? + log_end_super : log_end_io; bio->bi_private = lc; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); @@ -418,11 +429,18 @@ static int log_super(struct log_writes_c *lc) super.nr_entries = cpu_to_le64(lc->logged_entries); super.sectorsize = cpu_to_le32(lc->sectorsize); - if (write_metadata(lc, &super, sizeof(super), NULL, 0, 0)) { + if (write_metadata(lc, &super, sizeof(super), NULL, 0, + WRITE_LOG_SUPER_SECTOR)) { DMERR("Couldn't write super"); return -1; } + /* + * Super sector should be writen in-order, otherwise the + * nr_entries could be rewritten incorrectly by an old bio. + */ + wait_for_completion_io(&lc->super_done); + return 0; } @@ -531,6 +549,7 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) INIT_LIST_HEAD(&lc->unflushed_blocks); INIT_LIST_HEAD(&lc->logging_blocks); init_waitqueue_head(&lc->wait); + init_completion(&lc->super_done); atomic_set(&lc->io_blocks, 0); atomic_set(&lc->pending_blocks, 0); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index f96efa363d34..59e919b92873 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4321,12 +4321,12 @@ void bond_setup(struct net_device *bond_dev) bond_dev->features |= NETIF_F_NETNS_LOCAL; bond_dev->hw_features = BOND_VLAN_FEATURES | - NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4; bond_dev->features |= bond_dev->hw_features; + bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; } /* Destroy a bonding device. diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c index 18bc035da850..1fff462a4175 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c @@ -843,9 +843,14 @@ int aq_filters_vlans_update(struct aq_nic_s *aq_nic) return err; if (aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) { - if (hweight < AQ_VLAN_MAX_FILTERS) - err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, true); + if (hweight < AQ_VLAN_MAX_FILTERS && hweight > 0) { + err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, + !(aq_nic->packet_filter & IFF_PROMISC)); + aq_nic->aq_nic_cfg.is_vlan_force_promisc = false; + } else { /* otherwise left in promiscue mode */ + aq_nic->aq_nic_cfg.is_vlan_force_promisc = true; + } } return err; @@ -866,6 +871,7 @@ int aq_filters_vlan_offload_off(struct aq_nic_s *aq_nic) if (unlikely(!aq_hw_ops->hw_filter_vlan_ctrl)) return -EOPNOTSUPP; + aq_nic->aq_nic_cfg.is_vlan_force_promisc = true; err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, false); if (err) return err; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index ff83667410bd..550abfe6973d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -117,6 +117,7 @@ void aq_nic_cfg_start(struct aq_nic_s *self) cfg->link_speed_msk &= cfg->aq_hw_caps->link_speed_msk; cfg->features = cfg->aq_hw_caps->hw_features; + cfg->is_vlan_force_promisc = true; } static int aq_nic_update_link_status(struct aq_nic_s *self) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index 8e34c1e49bf2..65e681be9b5d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -36,6 +36,7 @@ struct aq_nic_cfg_s { u32 flow_control; u32 link_speed_msk; u32 wol; + bool is_vlan_force_promisc; u16 is_mc_list_enabled; u16 mc_list_count; bool is_autoneg; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index ec302fdfec63..a4cc04741115 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -771,8 +771,15 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self, unsigned int packet_filter) { unsigned int i = 0U; + struct aq_nic_cfg_s *cfg = self->aq_nic_cfg; + + hw_atl_rpfl2promiscuous_mode_en_set(self, + IS_FILTER_ENABLED(IFF_PROMISC)); + + hw_atl_rpf_vlan_prom_mode_en_set(self, + IS_FILTER_ENABLED(IFF_PROMISC) || + cfg->is_vlan_force_promisc); - hw_atl_rpfl2promiscuous_mode_en_set(self, IS_FILTER_ENABLED(IFF_PROMISC)); hw_atl_rpfl2multicast_flr_en_set(self, IS_FILTER_ENABLED(IFF_ALLMULTI), 0); @@ -781,13 +788,13 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self, hw_atl_rpfl2broadcast_en_set(self, IS_FILTER_ENABLED(IFF_BROADCAST)); - self->aq_nic_cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST); + cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST); for (i = HW_ATL_B0_MAC_MIN; i < HW_ATL_B0_MAC_MAX; ++i) hw_atl_rpfl2_uc_flr_en_set(self, - (self->aq_nic_cfg->is_mc_list_enabled && - (i <= self->aq_nic_cfg->mc_list_count)) ? - 1U : 0U, i); + (cfg->is_mc_list_enabled && + (i <= cfg->mc_list_count)) ? + 1U : 0U, i); return aq_hw_err_from_flags(self); } @@ -1079,7 +1086,7 @@ static int hw_atl_b0_hw_vlan_set(struct aq_hw_s *self, static int hw_atl_b0_hw_vlan_ctrl(struct aq_hw_s *self, bool enable) { /* set promisc in case of disabing the vland filter */ - hw_atl_rpf_vlan_prom_mode_en_set(self, !!!enable); + hw_atl_rpf_vlan_prom_mode_en_set(self, !enable); return aq_hw_err_from_flags(self); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 8d9cc2157afd..7423262ce590 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -122,7 +122,7 @@ static int adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec, * programmed with (2^32 – ) */ if (gmac4) - sec = (100000000ULL - sec); + sec = -sec; value = readl(ioaddr + PTP_TCR); if (value & PTP_TCR_TSCTRLSSR) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 635d88d82610..a634054dcb11 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2957,12 +2957,15 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) /* Manage tx mitigation */ tx_q->tx_count_frames += nfrags + 1; - if (priv->tx_coal_frames <= tx_q->tx_count_frames) { + if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) && + !(priv->synopsys_id >= DWMAC_CORE_4_00 && + (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + priv->hwts_tx_en)) { + stmmac_tx_timer_arm(priv, queue); + } else { + tx_q->tx_count_frames = 0; stmmac_set_tx_ic(priv, desc); priv->xstats.tx_set_ic_bit++; - tx_q->tx_count_frames = 0; - } else { - stmmac_tx_timer_arm(priv, queue); } skb_tx_timestamp(skb); @@ -3176,12 +3179,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) * element in case of no SG. */ tx_q->tx_count_frames += nfrags + 1; - if (priv->tx_coal_frames <= tx_q->tx_count_frames) { + if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) && + !(priv->synopsys_id >= DWMAC_CORE_4_00 && + (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + priv->hwts_tx_en)) { + stmmac_tx_timer_arm(priv, queue); + } else { + tx_q->tx_count_frames = 0; stmmac_set_tx_ic(priv, desc); priv->xstats.tx_set_ic_bit++; - tx_q->tx_count_frames = 0; - } else { - stmmac_tx_timer_arm(priv, queue); } skb_tx_timestamp(skb); diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 16963f7a88f7..351f25e1fc48 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2135,12 +2135,12 @@ static void team_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; dev->hw_features = TEAM_VLAN_FEATURES | - NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4; dev->features |= dev->hw_features; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; } static int team_newlink(struct net *src_net, struct net_device *dev, diff --git a/drivers/net/tun.c b/drivers/net/tun.c index f4c933ac6edf..d1cafb29eca9 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1024,18 +1024,8 @@ static void tun_net_uninit(struct net_device *dev) /* Net device open. */ static int tun_net_open(struct net_device *dev) { - struct tun_struct *tun = netdev_priv(dev); - int i; - netif_tx_start_all_queues(dev); - for (i = 0; i < tun->numqueues; i++) { - struct tun_file *tfile; - - tfile = rtnl_dereference(tun->tfiles[i]); - tfile->socket.sk->sk_write_space(tfile->socket.sk); - } - return 0; } @@ -3636,6 +3626,7 @@ static int tun_device_event(struct notifier_block *unused, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct tun_struct *tun = netdev_priv(dev); + int i; if (dev->rtnl_link_ops != &tun_link_ops) return NOTIFY_DONE; @@ -3645,6 +3636,14 @@ static int tun_device_event(struct notifier_block *unused, if (tun_queue_resize(tun)) return NOTIFY_BAD; break; + case NETDEV_UP: + for (i = 0; i < tun->numqueues; i++) { + struct tun_file *tfile; + + tfile = rtnl_dereference(tun->tfiles[i]); + tfile->socket.sk->sk_write_space(tfile->socket.sk); + } + break; default: break; } diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index d9a6699abe59..e657d8947125 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1412,7 +1412,7 @@ static int qmi_wwan_probe(struct usb_interface *intf, * different. Ignore the current interface if the number of endpoints * equals the number for the diag interface (two). */ - info = (void *)&id->driver_info; + info = (void *)id->driver_info; if (info->data & QMI_WWAN_QUIRK_QUECTEL_DYNCFG) { if (desc->bNumEndpoints == 2) diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index ecee4b3ff073..377b07b2feeb 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -763,6 +763,7 @@ static int pvscsi_queue_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd struct pvscsi_adapter *adapter = shost_priv(host); struct pvscsi_ctx *ctx; unsigned long flags; + unsigned char op; spin_lock_irqsave(&adapter->hw_lock, flags); @@ -775,13 +776,14 @@ static int pvscsi_queue_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd } cmd->scsi_done = done; + op = cmd->cmnd[0]; dev_dbg(&cmd->device->sdev_gendev, - "queued cmd %p, ctx %p, op=%x\n", cmd, ctx, cmd->cmnd[0]); + "queued cmd %p, ctx %p, op=%x\n", cmd, ctx, op); spin_unlock_irqrestore(&adapter->hw_lock, flags); - pvscsi_kick_io(adapter, cmd->cmnd[0]); + pvscsi_kick_io(adapter, op); return 0; } diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 82a48e830018..e4b59e76afb0 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -856,9 +856,14 @@ err: static int load_flat_shared_library(int id, struct lib_info *libs) { + /* + * This is a fake bprm struct; only the members "buf", "file" and + * "filename" are actually used. + */ struct linux_binprm bprm; int res; char buf[16]; + loff_t pos = 0; memset(&bprm, 0, sizeof(bprm)); @@ -872,25 +877,11 @@ static int load_flat_shared_library(int id, struct lib_info *libs) if (IS_ERR(bprm.file)) return res; - bprm.cred = prepare_exec_creds(); - res = -ENOMEM; - if (!bprm.cred) - goto out; - - /* We don't really care about recalculating credentials at this point - * as we're past the point of no return and are dealing with shared - * libraries. - */ - bprm.called_set_creds = 1; + res = kernel_read(bprm.file, bprm.buf, BINPRM_BUF_SIZE, &pos); - res = prepare_binprm(&bprm); - - if (!res) + if (res >= 0) res = load_flat_file(&bprm, libs, id, NULL); - abort_creds(bprm.cred); - -out: allow_write_access(bprm.file); fput(bprm.file); diff --git a/fs/inode.c b/fs/inode.c index 9a453f3637f8..5bd1dd2e942f 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -349,7 +349,7 @@ EXPORT_SYMBOL(inc_nlink); static void __address_space_init_once(struct address_space *mapping) { - xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT); init_rwsem(&mapping->i_mmap_rwsem); INIT_LIST_HEAD(&mapping->private_list); spin_lock_init(&mapping->private_lock); diff --git a/fs/io_uring.c b/fs/io_uring.c index e82adbf8adc1..b897695c91c0 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -533,6 +533,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, state->cur_req++; } + req->file = NULL; req->ctx = ctx; req->flags = 0; /* one is dropped after submission, the other at completion */ @@ -1684,10 +1685,8 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, flags = READ_ONCE(s->sqe->flags); fd = READ_ONCE(s->sqe->fd); - if (!io_op_needs_file(s->sqe)) { - req->file = NULL; + if (!io_op_needs_file(s->sqe)) return 0; - } if (flags & IOSQE_FIXED_FILE) { if (unlikely(!ctx->user_files || diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index a809989807d6..19f856f45689 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -18,7 +18,7 @@ #define NFSDBG_FACILITY NFSDBG_PNFS_LD -static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS; +static unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO; static unsigned int dataserver_retrans; static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 63c6bb1f8c4d..8c286f8228e5 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -355,6 +355,10 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) /* Mark is just getting destroyed or created? */ if (!conn) continue; + if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID)) + continue; + /* Pairs with smp_wmb() in fsnotify_add_mark_list() */ + smp_rmb(); fsid = conn->fsid; if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1])) continue; diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 22acb0a79b53..e9d49191d39e 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -495,10 +495,13 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, conn->type = type; conn->obj = connp; /* Cache fsid of filesystem containing the object */ - if (fsid) + if (fsid) { conn->fsid = *fsid; - else + conn->flags = FSNOTIFY_CONN_FLAG_HAS_FSID; + } else { conn->fsid.val[0] = conn->fsid.val[1] = 0; + conn->flags = 0; + } if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) inode = igrab(fsnotify_conn_inode(conn)); /* @@ -573,7 +576,12 @@ restart: if (err) return err; goto restart; - } else if (fsid && (conn->fsid.val[0] || conn->fsid.val[1]) && + } else if (fsid && !(conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID)) { + conn->fsid = *fsid; + /* Pairs with smp_rmb() in fanotify_get_fsid() */ + smp_wmb(); + conn->flags |= FSNOTIFY_CONN_FLAG_HAS_FSID; + } else if (fsid && (conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID) && (fsid->val[0] != conn->fsid.val[0] || fsid->val[1] != conn->fsid.val[1])) { /* diff --git a/fs/proc/array.c b/fs/proc/array.c index 2edbb657f859..55180501b915 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -462,7 +462,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, * a program is not able to use ptrace(2) in that case. It is * safe because the task has stopped executing permanently. */ - if (permitted && (task->flags & PF_DUMPCORE)) { + if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE))) { if (try_get_task_stack(task)) { eip = KSTK_EIP(task); esp = KSTK_ESP(task); diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index fcb61b4659b3..8666fe7f35d7 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h @@ -23,7 +23,9 @@ * * Return: * 0 - On success - * <0 - On error + * -EFAULT - User access resulted in a page fault + * -EAGAIN - Atomic operation was unable to complete due to contention + * -ENOSYS - Operation not supported */ static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr) @@ -85,7 +87,9 @@ out_pagefault_enable: * * Return: * 0 - On success - * <0 - On error + * -EFAULT - User access resulted in a page fault + * -EAGAIN - Atomic operation was unable to complete due to contention + * -ENOSYS - Function not implemented (only if !HAVE_FUTEX_CMPXCHG) */ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index a4c644c1c091..ada0246d43ab 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -230,6 +230,12 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_RECVMSG, NULL) + +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_RECVMSG, NULL) + #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \ ({ \ int __ret = 0; \ @@ -319,6 +325,8 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 094b38f2d9a1..0f67cabbbec8 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -292,7 +292,9 @@ typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; */ struct fsnotify_mark_connector { spinlock_t lock; - unsigned int type; /* Type of object [lock] */ + unsigned short type; /* Type of object [lock] */ +#define FSNOTIFY_CONN_FLAG_HAS_FSID 0x01 + unsigned short flags; /* flags [lock] */ __kernel_fsid_t fsid; /* fsid of filesystem containing object */ union { /* Object pointer [lock] */ diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 0e01e6129145..5921599b6dc4 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -265,6 +265,7 @@ enum xa_lock_type { #define XA_FLAGS_TRACK_FREE ((__force gfp_t)4U) #define XA_FLAGS_ZERO_BUSY ((__force gfp_t)8U) #define XA_FLAGS_ALLOC_WRAPPED ((__force gfp_t)16U) +#define XA_FLAGS_ACCOUNT ((__force gfp_t)32U) #define XA_FLAGS_MARK(mark) ((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \ (__force unsigned)(mark))) diff --git a/include/net/tls.h b/include/net/tls.h index 053082d98906..a67ad7d56ff2 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -347,21 +347,6 @@ static inline bool tls_is_partially_sent_record(struct tls_context *ctx) return !!ctx->partially_sent_record; } -static inline int tls_complete_pending_work(struct sock *sk, - struct tls_context *ctx, - int flags, long *timeo) -{ - int rc = 0; - - if (unlikely(sk->sk_write_pending)) - rc = wait_on_pending_writer(sk, timeo); - - if (!rc && tls_is_partially_sent_record(ctx)) - rc = tls_push_partial_record(sk, ctx, flags); - - return rc; -} - static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx) { return tls_ctx->pending_open_record_frags; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 929c8e537a14..9d01f4788d3e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -187,6 +187,8 @@ enum bpf_attach_type { BPF_CGROUP_UDP6_SENDMSG, BPF_LIRC_MODE2, BPF_FLOW_DISSECTOR, + BPF_CGROUP_UDP4_RECVMSG = 19, + BPF_CGROUP_UDP6_RECVMSG, __MAX_BPF_ATTACH_TYPE }; @@ -3104,8 +3106,8 @@ struct bpf_raw_tracepoint_args { /* DIRECT: Skip the FIB rules and go to FIB table associated with device * OUTPUT: Do lookup from egress perspective; default is ingress */ -#define BPF_FIB_LOOKUP_DIRECT BIT(0) -#define BPF_FIB_LOOKUP_OUTPUT BIT(1) +#define BPF_FIB_LOOKUP_DIRECT (1U << 0) +#define BPF_FIB_LOOKUP_OUTPUT (1U << 1) enum { BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 93a5cbbde421..3b03a7342f3c 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -715,9 +715,14 @@ find_leftmost: * have exact two children, so this function will never return NULL. */ for (node = search_root; node;) { - if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) + if (node->flags & LPM_TREE_NODE_FLAG_IM) { + node = rcu_dereference(node->child[0]); + } else { next_node = node; - node = rcu_dereference(node->child[0]); + node = rcu_dereference(node->child[0]); + if (!node) + node = rcu_dereference(next_node->child[1]); + } } do_copy: next_key->prefixlen = next_node->prefixlen; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index db6e825e2958..29b8f20d500c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1520,6 +1520,8 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type, case BPF_CGROUP_INET6_CONNECT: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UDP4_RECVMSG: + case BPF_CGROUP_UDP6_RECVMSG: return 0; default: return -EINVAL; @@ -1809,6 +1811,8 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_CGROUP_INET6_CONNECT: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UDP4_RECVMSG: + case BPF_CGROUP_UDP6_RECVMSG: ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; break; case BPF_CGROUP_SOCK_OPS: @@ -1891,6 +1895,8 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_CGROUP_INET6_CONNECT: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UDP4_RECVMSG: + case BPF_CGROUP_UDP6_RECVMSG: ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; break; case BPF_CGROUP_SOCK_OPS: @@ -1939,6 +1945,8 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_INET6_CONNECT: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UDP4_RECVMSG: + case BPF_CGROUP_UDP6_RECVMSG: case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_DEVICE: break; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 950fac024fbb..4ff130ddfbf6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5145,9 +5145,12 @@ static int check_return_code(struct bpf_verifier_env *env) struct tnum range = tnum_range(0, 1); switch (env->prog->type) { + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: + if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG || + env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG) + range = tnum_range(1, 1); case BPF_PROG_TYPE_CGROUP_SKB: case BPF_PROG_TYPE_CGROUP_SOCK: - case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_CGROUP_DEVICE: break; @@ -5163,16 +5166,17 @@ static int check_return_code(struct bpf_verifier_env *env) } if (!tnum_in(range, reg->var_off)) { + char tn_buf[48]; + verbose(env, "At program exit the register R0 "); if (!tnum_is_unknown(reg->var_off)) { - char tn_buf[48]; - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); verbose(env, "has value %s", tn_buf); } else { verbose(env, "has unknown scalar value"); } - verbose(env, " should have been 0 or 1\n"); + tnum_strn(tn_buf, sizeof(tn_buf), range); + verbose(env, " should have been in %s\n", tn_buf); return -EINVAL; } return 0; diff --git a/kernel/cpu.c b/kernel/cpu.c index 9cc8b6fdb2dc..6170034f4118 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2315,6 +2315,9 @@ static int __init mitigations_parse_cmdline(char *arg) cpu_mitigations = CPU_MITIGATIONS_AUTO; else if (!strcmp(arg, "auto,nosmt")) cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT; + else + pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n", + arg); return 0; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index d64c00afceb5..568a0e839903 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -402,8 +402,6 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = { .arg4_type = ARG_CONST_SIZE, }; -static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd); - static __always_inline u64 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, u64 flags, struct perf_sample_data *sd) @@ -434,24 +432,50 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, return perf_event_output(event, sd, regs); } +/* + * Support executing tracepoints in normal, irq, and nmi context that each call + * bpf_perf_event_output + */ +struct bpf_trace_sample_data { + struct perf_sample_data sds[3]; +}; + +static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds); +static DEFINE_PER_CPU(int, bpf_trace_nest_level); BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, u64, flags, void *, data, u64, size) { - struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd); + struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds); + int nest_level = this_cpu_inc_return(bpf_trace_nest_level); struct perf_raw_record raw = { .frag = { .size = size, .data = data, }, }; + struct perf_sample_data *sd; + int err; - if (unlikely(flags & ~(BPF_F_INDEX_MASK))) - return -EINVAL; + if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) { + err = -EBUSY; + goto out; + } + + sd = &sds->sds[nest_level - 1]; + + if (unlikely(flags & ~(BPF_F_INDEX_MASK))) { + err = -EINVAL; + goto out; + } perf_sample_data_init(sd, 0, 0); sd->raw = &raw; - return __bpf_perf_event_output(regs, map, flags, sd); + err = __bpf_perf_event_output(regs, map, flags, sd); + +out: + this_cpu_dec(bpf_trace_nest_level); + return err; } static const struct bpf_func_proto bpf_perf_event_output_proto = { @@ -808,16 +832,48 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) /* * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp * to avoid potential recursive reuse issue when/if tracepoints are added - * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack + * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack. + * + * Since raw tracepoints run despite bpf_prog_active, support concurrent usage + * in normal, irq, and nmi context. */ -static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs); +struct bpf_raw_tp_regs { + struct pt_regs regs[3]; +}; +static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs); +static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level); +static struct pt_regs *get_bpf_raw_tp_regs(void) +{ + struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs); + int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level); + + if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) { + this_cpu_dec(bpf_raw_tp_nest_level); + return ERR_PTR(-EBUSY); + } + + return &tp_regs->regs[nest_level - 1]; +} + +static void put_bpf_raw_tp_regs(void) +{ + this_cpu_dec(bpf_raw_tp_nest_level); +} + BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args, struct bpf_map *, map, u64, flags, void *, data, u64, size) { - struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); + struct pt_regs *regs = get_bpf_raw_tp_regs(); + int ret; + + if (IS_ERR(regs)) + return PTR_ERR(regs); perf_fetch_caller_regs(regs); - return ____bpf_perf_event_output(regs, map, flags, data, size); + ret = ____bpf_perf_event_output(regs, map, flags, data, size); + + put_bpf_raw_tp_regs(); + return ret; } static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { @@ -834,12 +890,18 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, struct bpf_map *, map, u64, flags) { - struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); + struct pt_regs *regs = get_bpf_raw_tp_regs(); + int ret; + + if (IS_ERR(regs)) + return PTR_ERR(regs); perf_fetch_caller_regs(regs); /* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */ - return bpf_get_stackid((unsigned long) regs, (unsigned long) map, - flags, 0, 0); + ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map, + flags, 0, 0); + put_bpf_raw_tp_regs(); + return ret; } static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { @@ -854,11 +916,17 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args, void *, buf, u32, size, u64, flags) { - struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); + struct pt_regs *regs = get_bpf_raw_tp_regs(); + int ret; + + if (IS_ERR(regs)) + return PTR_ERR(regs); perf_fetch_caller_regs(regs); - return bpf_get_stack((unsigned long) regs, (unsigned long) buf, - (unsigned long) size, flags, 0); + ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf, + (unsigned long) size, flags, 0); + put_bpf_raw_tp_regs(); + return ret; } static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = { diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 3ea65cdff30d..4ad967453b6f 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -205,8 +205,6 @@ void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect) void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect, int is_constant) { - unsigned long flags = user_access_save(); - /* A constant is always correct */ if (is_constant) { f->constant++; @@ -225,8 +223,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, f->data.correct++; else f->data.incorrect++; - - user_access_restore(flags); } EXPORT_SYMBOL(ftrace_likely_update); diff --git a/lib/xarray.c b/lib/xarray.c index 6be3acbb861f..446b956c9188 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -298,6 +298,8 @@ bool xas_nomem(struct xa_state *xas, gfp_t gfp) xas_destroy(xas); return false; } + if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) + gfp |= __GFP_ACCOUNT; xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); if (!xas->xa_alloc) return false; @@ -325,6 +327,8 @@ static bool __xas_nomem(struct xa_state *xas, gfp_t gfp) xas_destroy(xas); return false; } + if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) + gfp |= __GFP_ACCOUNT; if (gfpflags_allow_blocking(gfp)) { xas_unlock_type(xas, lock_type); xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); @@ -358,8 +362,12 @@ static void *xas_alloc(struct xa_state *xas, unsigned int shift) if (node) { xas->xa_alloc = NULL; } else { - node = kmem_cache_alloc(radix_tree_node_cachep, - GFP_NOWAIT | __GFP_NOWARN); + gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN; + + if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) + gfp |= __GFP_ACCOUNT; + + node = kmem_cache_alloc(radix_tree_node_cachep, gfp); if (!node) { xas_set_err(xas, -ENOMEM); return NULL; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5b4f00be325d..ec81808830ae 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1491,16 +1491,29 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, /* * Dissolve a given free hugepage into free buddy pages. This function does - * nothing for in-use (including surplus) hugepages. Returns -EBUSY if the - * dissolution fails because a give page is not a free hugepage, or because - * free hugepages are fully reserved. + * nothing for in-use hugepages and non-hugepages. + * This function returns values like below: + * + * -EBUSY: failed to dissolved free hugepages or the hugepage is in-use + * (allocated or reserved.) + * 0: successfully dissolved free hugepages or the page is not a + * hugepage (considered as already dissolved) */ int dissolve_free_huge_page(struct page *page) { int rc = -EBUSY; + /* Not to disrupt normal path by vainly holding hugetlb_lock */ + if (!PageHuge(page)) + return 0; + spin_lock(&hugetlb_lock); - if (PageHuge(page) && !page_count(page)) { + if (!PageHuge(page)) { + rc = 0; + goto out; + } + + if (!page_count(page)) { struct page *head = compound_head(page); struct hstate *h = page_hstate(head); int nid = page_to_nid(head); @@ -1545,11 +1558,9 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) { page = pfn_to_page(pfn); - if (PageHuge(page) && !page_count(page)) { - rc = dissolve_free_huge_page(page); - if (rc) - break; - } + rc = dissolve_free_huge_page(page); + if (rc) + break; } return rc; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index fc8b51744579..3a83e279cc98 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1733,6 +1733,8 @@ static int soft_offline_huge_page(struct page *page, int flags) if (!ret) { if (set_hwpoison_free_buddy_page(page)) num_poisoned_pages_inc(); + else + ret = -EBUSY; } } return ret; @@ -1857,11 +1859,8 @@ static int soft_offline_in_use_page(struct page *page, int flags) static int soft_offline_free_page(struct page *page) { - int rc = 0; - struct page *head = compound_head(page); + int rc = dissolve_free_huge_page(page); - if (PageHuge(head)) - rc = dissolve_free_huge_page(page); if (!rc) { if (set_hwpoison_free_buddy_page(page)) num_poisoned_pages_inc(); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 2219e747df49..5b3bf1747c19 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -306,7 +306,7 @@ static void mpol_rebind_nodemask(struct mempolicy *pol, const nodemask_t *nodes) else { nodes_remap(tmp, pol->v.nodes,pol->w.cpuset_mems_allowed, *nodes); - pol->w.cpuset_mems_allowed = tmp; + pol->w.cpuset_mems_allowed = *nodes; } if (nodes_empty(tmp)) diff --git a/mm/page_idle.c b/mm/page_idle.c index 0b39ec0c945c..295512465065 100644 --- a/mm/page_idle.c +++ b/mm/page_idle.c @@ -136,7 +136,7 @@ static ssize_t page_idle_bitmap_read(struct file *file, struct kobject *kobj, end_pfn = pfn + count * BITS_PER_BYTE; if (end_pfn > max_pfn) - end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS); + end_pfn = max_pfn; for (; pfn < end_pfn; pfn++) { bit = pfn % BITMAP_CHUNK_BITS; @@ -181,7 +181,7 @@ static ssize_t page_idle_bitmap_write(struct file *file, struct kobject *kobj, end_pfn = pfn + count * BITS_PER_BYTE; if (end_pfn > max_pfn) - end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS); + end_pfn = max_pfn; for (; pfn < end_pfn; pfn++) { bit = pfn % BITMAP_CHUNK_BITS; diff --git a/mm/page_io.c b/mm/page_io.c index 2e8019d0e048..189415852077 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -29,10 +29,9 @@ static struct bio *get_swap_bio(gfp_t gfp_flags, struct page *page, bio_end_io_t end_io) { - int i, nr = hpage_nr_pages(page); struct bio *bio; - bio = bio_alloc(gfp_flags, nr); + bio = bio_alloc(gfp_flags, 1); if (bio) { struct block_device *bdev; @@ -41,9 +40,7 @@ static struct bio *get_swap_bio(gfp_t gfp_flags, bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9; bio->bi_end_io = end_io; - for (i = 0; i < nr; i++) - bio_add_page(bio, page + i, PAGE_SIZE, 0); - VM_BUG_ON(bio->bi_iter.bi_size != PAGE_SIZE * nr); + bio_add_page(bio, page, PAGE_SIZE * hpage_nr_pages(page), 0); } return bio; } diff --git a/net/core/filter.c b/net/core/filter.c index 27e61ffd9039..b76f14197128 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6420,6 +6420,7 @@ static bool sock_addr_is_valid_access(int off, int size, case BPF_CGROUP_INET4_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_UDP4_SENDMSG: + case BPF_CGROUP_UDP4_RECVMSG: break; default: return false; @@ -6430,6 +6431,7 @@ static bool sock_addr_is_valid_access(int off, int size, case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET6_CONNECT: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UDP6_RECVMSG: break; default: return false; diff --git a/net/core/sock.c b/net/core/sock.c index 067878a1e4c5..30afb072eecf 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1482,9 +1482,6 @@ int sock_getsockopt(struct socket *sock, int level, int optname, { u32 meminfo[SK_MEMINFO_VARS]; - if (get_user(len, optlen)) - return -EFAULT; - sk_get_meminfo(sk, meminfo); len = min_t(unsigned int, len, sizeof(meminfo)); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index dc91c27bb788..3f6e95cb21b0 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -201,7 +201,7 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) } sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol, iph->saddr, iph->daddr, - skb->dev->ifindex, sdif); + dif, sdif); } out: read_unlock(&raw_v4_hashinfo.lock); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3b179ce6170f..ee999cb5e670 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -503,7 +503,11 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, struct sock *udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport) { - return __udp4_lib_lookup_skb(skb, sport, dport, &udp_table); + const struct iphdr *iph = ip_hdr(skb); + + return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + inet_sdif(skb), &udp_table, NULL); } EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb); @@ -1783,6 +1787,10 @@ try_again: sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); *addr_len = sizeof(*sin); + + if (cgroup_bpf_enabled) + BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, + (struct sockaddr *)sin); } if (udp_sk(sk)->gro_enabled) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 622eeaf5732b..3a01d3d2f105 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -242,7 +242,7 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), - inet6_sdif(skb), &udp_table, skb); + inet6_sdif(skb), &udp_table, NULL); } EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb); @@ -370,6 +370,10 @@ try_again: inet6_iif(skb)); } *addr_len = sizeof(*sin6); + + if (cgroup_bpf_enabled) + BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, + (struct sockaddr *)sin6); } if (udp_sk(sk)->gro_enabled) @@ -516,7 +520,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, - inet6_iif(skb), inet6_sdif(skb), udptable, skb); + inet6_iif(skb), inet6_sdif(skb), udptable, NULL); if (!sk) { /* No socket for error: try tunnels before discarding */ sk = ERR_PTR(-ENOENT); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 71d5544243d2..21814acb862d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2409,6 +2409,9 @@ static void tpacket_destruct_skb(struct sk_buff *skb) ts = __packet_set_timestamp(po, ph, skb); __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); + + if (!packet_read_pending(&po->tx_ring)) + complete(&po->skb_completion); } sock_wfree(skb); @@ -2593,7 +2596,7 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame, static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { - struct sk_buff *skb; + struct sk_buff *skb = NULL; struct net_device *dev; struct virtio_net_hdr *vnet_hdr = NULL; struct sockcm_cookie sockc; @@ -2608,6 +2611,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) int len_sum = 0; int status = TP_STATUS_AVAILABLE; int hlen, tlen, copylen = 0; + long timeo = 0; mutex_lock(&po->pg_vec_lock); @@ -2654,12 +2658,21 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr) size_max = dev->mtu + reserve + VLAN_HLEN; + reinit_completion(&po->skb_completion); + do { ph = packet_current_frame(po, &po->tx_ring, TP_STATUS_SEND_REQUEST); if (unlikely(ph == NULL)) { - if (need_wait && need_resched()) - schedule(); + if (need_wait && skb) { + timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT); + timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo); + if (timeo <= 0) { + err = !timeo ? -ETIMEDOUT : -ERESTARTSYS; + goto out_put; + } + } + /* check for additional frames */ continue; } @@ -3215,6 +3228,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, sock_init_data(sock, sk); po = pkt_sk(sk); + init_completion(&po->skb_completion); sk->sk_family = PF_PACKET; po->num = proto; po->xmit = dev_queue_xmit; @@ -4327,7 +4341,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, req3->tp_sizeof_priv || req3->tp_feature_req_word) { err = -EINVAL; - goto out; + goto out_free_pg_vec; } } break; @@ -4391,6 +4405,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, prb_shutdown_retire_blk_timer(po, rb_queue); } +out_free_pg_vec: if (pg_vec) free_pg_vec(pg_vec, order, req->tp_block_nr); out: diff --git a/net/packet/internal.h b/net/packet/internal.h index 3bb7c5fb3bff..c70a2794456f 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -128,6 +128,7 @@ struct packet_sock { unsigned int tp_hdrlen; unsigned int tp_reserve; unsigned int tp_tstamp; + struct completion skb_completion; struct net_device __rcu *cached_dev; int (*xmit)(struct sk_buff *skb); struct packet_type prot_hook ____cacheline_aligned_in_smp; diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 0448b68fce74..bcfc81ee153d 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -133,10 +133,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Initialize the bind addr area */ sctp_bind_addr_init(&ep->base.bind_addr, 0); - /* Remember who we are attached to. */ - ep->base.sk = sk; - sock_hold(ep->base.sk); - /* Create the lists of associations. */ INIT_LIST_HEAD(&ep->asocs); @@ -169,6 +165,10 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, ep->prsctp_enable = net->sctp.prsctp_enable; ep->reconf_enable = net->sctp.reconf_enable; + /* Remember who we are attached to. */ + ep->base.sk = sk; + sock_hold(ep->base.sk); + return ep; nomem_shkey: diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 732d4b57411a..a437ee8ae482 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -950,6 +950,8 @@ static int xs_local_send_request(struct rpc_rqst *req) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; + rpc_fraghdr rm = xs_stream_record_marker(xdr); + unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen; int status; int sent = 0; @@ -964,9 +966,7 @@ static int xs_local_send_request(struct rpc_rqst *req) req->rq_xtime = ktime_get(); status = xs_sendpages(transport->sock, NULL, 0, xdr, - transport->xmit.offset, - xs_stream_record_marker(xdr), - &sent); + transport->xmit.offset, rm, &sent); dprintk("RPC: %s(%u) = %d\n", __func__, xdr->len - transport->xmit.offset, status); @@ -976,7 +976,7 @@ static int xs_local_send_request(struct rpc_rqst *req) if (likely(sent > 0) || status == 0) { transport->xmit.offset += sent; req->rq_bytes_sent = transport->xmit.offset; - if (likely(req->rq_bytes_sent >= req->rq_slen)) { + if (likely(req->rq_bytes_sent >= msglen)) { req->rq_xmit_bytes_sent += transport->xmit.offset; transport->xmit.offset = 0; return 0; @@ -1097,6 +1097,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req) struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; + rpc_fraghdr rm = xs_stream_record_marker(xdr); + unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen; bool vm_wait = false; int status; int sent; @@ -1122,9 +1124,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) while (1) { sent = 0; status = xs_sendpages(transport->sock, NULL, 0, xdr, - transport->xmit.offset, - xs_stream_record_marker(xdr), - &sent); + transport->xmit.offset, rm, &sent); dprintk("RPC: xs_tcp_send_request(%u) = %d\n", xdr->len - transport->xmit.offset, status); @@ -1133,7 +1133,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) * reset the count of bytes sent. */ transport->xmit.offset += sent; req->rq_bytes_sent = transport->xmit.offset; - if (likely(req->rq_bytes_sent >= req->rq_slen)) { + if (likely(req->rq_bytes_sent >= msglen)) { req->rq_xmit_bytes_sent += transport->xmit.offset; transport->xmit.offset = 0; return 0; diff --git a/net/tipc/core.c b/net/tipc/core.c index 3ecca3b88bf8..eb0f701f9bf1 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -132,7 +132,7 @@ static int __init tipc_init(void) if (err) goto out_sysctl; - err = register_pernet_subsys(&tipc_net_ops); + err = register_pernet_device(&tipc_net_ops); if (err) goto out_pernet; @@ -140,7 +140,7 @@ static int __init tipc_init(void) if (err) goto out_socket; - err = register_pernet_subsys(&tipc_topsrv_net_ops); + err = register_pernet_device(&tipc_topsrv_net_ops); if (err) goto out_pernet_topsrv; @@ -151,11 +151,11 @@ static int __init tipc_init(void) pr_info("Started in single node mode\n"); return 0; out_bearer: - unregister_pernet_subsys(&tipc_topsrv_net_ops); + unregister_pernet_device(&tipc_topsrv_net_ops); out_pernet_topsrv: tipc_socket_stop(); out_socket: - unregister_pernet_subsys(&tipc_net_ops); + unregister_pernet_device(&tipc_net_ops); out_pernet: tipc_unregister_sysctl(); out_sysctl: @@ -170,9 +170,9 @@ out_netlink: static void __exit tipc_exit(void) { tipc_bearer_cleanup(); - unregister_pernet_subsys(&tipc_topsrv_net_ops); + unregister_pernet_device(&tipc_topsrv_net_ops); tipc_socket_stop(); - unregister_pernet_subsys(&tipc_net_ops); + unregister_pernet_device(&tipc_net_ops); tipc_netlink_stop(); tipc_netlink_compat_stop(); tipc_unregister_sysctl(); diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 340a6e7c43a7..8836aebd6180 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -445,7 +445,11 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, if (!bearer) return -EMSGSIZE; - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); if (!string_is_valid(name, len)) return -EINVAL; @@ -537,7 +541,11 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, name = (char *)TLV_DATA(msg->req); - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); if (!string_is_valid(name, len)) return -EINVAL; @@ -815,7 +823,11 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, if (!link) return -EMSGSIZE; - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); if (!string_is_valid(name, len)) return -EINVAL; diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 4d85d71f16e2..c86f136e5962 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -176,7 +176,6 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, goto tx_error; } - skb->dev = rt->dst.dev; ttl = ip4_dst_hoplimit(&rt->dst); udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr, dst->ipv4.s_addr, 0, ttl, 0, src->port, @@ -195,10 +194,9 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, if (err) goto tx_error; ttl = ip6_dst_hoplimit(ndst); - err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, - ndst->dev, &src->ipv6, - &dst->ipv6, 0, ttl, 0, src->port, - dst->port, false); + err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL, + &src->ipv6, &dst->ipv6, 0, ttl, 0, + src->port, dst->port, false); #endif } return err; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 478603f43964..f4f632824247 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -279,7 +279,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) goto skip_tx_cleanup; } - if (!tls_complete_pending_work(sk, ctx, 0, &timeo)) + if (unlikely(sk->sk_write_pending) && + !wait_on_pending_writer(sk, &timeo)) tls_handle_open_record(sk, 0); /* We need these for tls_sw_fallback handling of other packets */ diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index 02d7c871862a..006be3963977 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -573,13 +573,13 @@ static void test_lpm_get_next_key(void) /* add one more element (total two) */ key_p->prefixlen = 24; - inet_pton(AF_INET, "192.168.0.0", key_p->data); + inet_pton(AF_INET, "192.168.128.0", key_p->data); assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); memset(key_p, 0, key_size); assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && - key_p->data[1] == 168 && key_p->data[2] == 0); + key_p->data[1] == 168 && key_p->data[2] == 128); memset(next_key_p, 0, key_size); assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); @@ -592,7 +592,7 @@ static void test_lpm_get_next_key(void) /* Add one more element (total three) */ key_p->prefixlen = 24; - inet_pton(AF_INET, "192.168.128.0", key_p->data); + inet_pton(AF_INET, "192.168.0.0", key_p->data); assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); memset(key_p, 0, key_size); @@ -643,6 +643,41 @@ static void test_lpm_get_next_key(void) assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && errno == ENOENT); + /* Add one more element (total five) */ + key_p->prefixlen = 28; + inet_pton(AF_INET, "192.168.1.128", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 28 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 1 && + next_key_p->data[3] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 1); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + /* no exact matching key should return the first one in post order */ key_p->prefixlen = 22; inet_pton(AF_INET, "192.168.1.0", key_p->data);