diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst index f2b3439edcc2c..860fe651d6453 100644 --- a/Documentation/accounting/psi.rst +++ b/Documentation/accounting/psi.rst @@ -92,7 +92,8 @@ Triggers can be set on more than one psi metric and more than one trigger for the same psi metric can be specified. However for each trigger a separate file descriptor is required to be able to poll it separately from others, therefore for each trigger a separate open() syscall should be made even -when opening the same psi interface file. +when opening the same psi interface file. Write operations to a file descriptor +with an already existing psi trigger will fail with EBUSY. Monitors activate only when system enters stall state for the monitored psi metric and deactivates upon exit from the stall state. While system is diff --git a/Makefile b/Makefile index c43133c8a5b1f..9f328bfcaf97d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 96 +SUBLEVEL = 97 EXTRAVERSION = NAME = Dare mighty things diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 13e10b970ac83..0eb41dce55da3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1285,6 +1285,7 @@ struct kvm_x86_ops { }; struct kvm_x86_nested_ops { + void (*leave_nested)(struct kvm_vcpu *vcpu); int (*check_events)(struct kvm_vcpu *vcpu); bool (*hv_timer_pending)(struct kvm_vcpu *vcpu); int (*get_state)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index 2577d78757810..886d4648c9dd4 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -486,6 +486,8 @@ static void intel_ppin_init(struct cpuinfo_x86 *c) case INTEL_FAM6_BROADWELL_X: case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_ICELAKE_D: + case INTEL_FAM6_SAPPHIRERAPIDS_X: case INTEL_FAM6_XEON_PHI_KNL: case INTEL_FAM6_XEON_PHI_KNM: diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index f0946872f5e6d..23910e6a3f011 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -783,8 +783,10 @@ void svm_free_nested(struct vcpu_svm *svm) /* * Forcibly leave nested mode in order to be able to reset the VCPU later on. */ -void svm_leave_nested(struct vcpu_svm *svm) +void svm_leave_nested(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); + if (is_guest_mode(&svm->vcpu)) { struct vmcb *hsave = svm->nested.hsave; struct vmcb *vmcb = svm->vmcb; @@ -1185,7 +1187,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, return -EINVAL; if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) { - svm_leave_nested(svm); + svm_leave_nested(vcpu); svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); return 0; } @@ -1238,6 +1240,9 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, copy_vmcb_control_area(&hsave->control, &svm->vmcb->control); hsave->save = *save; + if (is_guest_mode(vcpu)) + svm_leave_nested(vcpu); + svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa; load_nested_vmcb_control(svm, ctl); nested_prepare_vmcb_control(svm); @@ -1252,6 +1257,7 @@ out_free: } struct kvm_x86_nested_ops svm_nested_ops = { + .leave_nested = svm_leave_nested, .check_events = svm_check_nested_events, .get_nested_state_pages = svm_get_nested_state_pages, .get_state = svm_get_nested_state, diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 2e6332af98aba..fa543c355fbdb 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -279,7 +279,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) { if (!(efer & EFER_SVME)) { - svm_leave_nested(svm); + svm_leave_nested(vcpu); svm_set_gif(svm, true); /* diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index be74e22b82ea7..2c007241fbf53 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -393,7 +393,7 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm) int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, struct vmcb *nested_vmcb); -void svm_leave_nested(struct vcpu_svm *svm); +void svm_leave_nested(struct kvm_vcpu *vcpu); void svm_free_nested(struct vcpu_svm *svm); int svm_allocate_nested(struct vcpu_svm *svm); int nested_svm_vmrun(struct vcpu_svm *svm); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 36661b15c3d04..0c2389d0fdafe 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -6628,6 +6628,7 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) } struct kvm_x86_nested_ops vmx_nested_ops = { + .leave_nested = vmx_leave_nested, .check_events = vmx_check_nested_events, .hv_timer_pending = nested_vmx_preemption_timer_pending, .get_state = vmx_get_nested_state, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7871b8e84b368..a5d6d79b023bc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4391,6 +4391,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.hflags |= HF_SMM_MASK; else vcpu->arch.hflags &= ~HF_SMM_MASK; + + kvm_x86_ops.nested_ops->leave_nested(vcpu); kvm_smm_changed(vcpu); } diff --git a/drivers/bus/simple-pm-bus.c b/drivers/bus/simple-pm-bus.c index 244b8f3b38b40..c5eb46cbf388b 100644 --- a/drivers/bus/simple-pm-bus.c +++ b/drivers/bus/simple-pm-bus.c @@ -16,33 +16,7 @@ static int simple_pm_bus_probe(struct platform_device *pdev) { - const struct device *dev = &pdev->dev; - struct device_node *np = dev->of_node; - const struct of_device_id *match; - - /* - * Allow user to use driver_override to bind this driver to a - * transparent bus device which has a different compatible string - * that's not listed in simple_pm_bus_of_match. We don't want to do any - * of the simple-pm-bus tasks for these devices, so return early. - */ - if (pdev->driver_override) - return 0; - - match = of_match_device(dev->driver->of_match_table, dev); - /* - * These are transparent bus devices (not simple-pm-bus matches) that - * have their child nodes populated automatically. So, don't need to - * do anything more. We only match with the device if this driver is - * the most specific match because we don't want to incorrectly bind to - * a device that has a more specific driver. - */ - if (match && match->data) { - if (of_property_match_string(np, "compatible", match->compatible) == 0) - return 0; - else - return -ENODEV; - } + struct device_node *np = pdev->dev.of_node; dev_dbg(&pdev->dev, "%s\n", __func__); @@ -56,25 +30,14 @@ static int simple_pm_bus_probe(struct platform_device *pdev) static int simple_pm_bus_remove(struct platform_device *pdev) { - const void *data = of_device_get_match_data(&pdev->dev); - - if (pdev->driver_override || data) - return 0; - dev_dbg(&pdev->dev, "%s\n", __func__); pm_runtime_disable(&pdev->dev); return 0; } -#define ONLY_BUS ((void *) 1) /* Match if the device is only a bus. */ - static const struct of_device_id simple_pm_bus_of_match[] = { { .compatible = "simple-pm-bus", }, - { .compatible = "simple-bus", .data = ONLY_BUS }, - { .compatible = "simple-mfd", .data = ONLY_BUS }, - { .compatible = "isa", .data = ONLY_BUS }, - { .compatible = "arm,amba-bus", .data = ONLY_BUS }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, simple_pm_bus_of_match); diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 9392de2679a1d..8eac7dc637b0f 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -1402,18 +1402,18 @@ static int vc4_hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable) u32 val; int ret; - ret = pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev); - if (ret) - return ret; + if (enable) { + ret = pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev); + if (ret) + return ret; - val = HDMI_READ(HDMI_CEC_CNTRL_5); - val &= ~(VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET | - VC4_HDMI_CEC_CNT_TO_4700_US_MASK | - VC4_HDMI_CEC_CNT_TO_4500_US_MASK); - val |= ((4700 / usecs) << VC4_HDMI_CEC_CNT_TO_4700_US_SHIFT) | - ((4500 / usecs) << VC4_HDMI_CEC_CNT_TO_4500_US_SHIFT); + val = HDMI_READ(HDMI_CEC_CNTRL_5); + val &= ~(VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET | + VC4_HDMI_CEC_CNT_TO_4700_US_MASK | + VC4_HDMI_CEC_CNT_TO_4500_US_MASK); + val |= ((4700 / usecs) << VC4_HDMI_CEC_CNT_TO_4700_US_SHIFT) | + ((4500 / usecs) << VC4_HDMI_CEC_CNT_TO_4500_US_SHIFT); - if (enable) { HDMI_WRITE(HDMI_CEC_CNTRL_5, val | VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET); HDMI_WRITE(HDMI_CEC_CNTRL_5, val); @@ -1439,7 +1439,10 @@ static int vc4_hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable) HDMI_WRITE(HDMI_CEC_CPU_MASK_SET, VC4_HDMI_CPU_CEC); HDMI_WRITE(HDMI_CEC_CNTRL_5, val | VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET); + + pm_runtime_put(&vc4_hdmi->pdev->dev); } + return 0; } @@ -1531,8 +1534,6 @@ static int vc4_hdmi_cec_init(struct vc4_hdmi *vc4_hdmi) if (ret < 0) goto err_delete_cec_adap; - pm_runtime_put(&vc4_hdmi->pdev->dev); - return 0; err_delete_cec_adap: diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 395eb0b526802..a816b30bca04c 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -721,7 +721,9 @@ static void xgbe_stop_timers(struct xgbe_prv_data *pdata) if (!channel->tx_ring) break; + /* Deactivate the Tx timer */ del_timer_sync(&channel->tx_timer); + channel->tx_timer_active = 0; } } @@ -2557,6 +2559,14 @@ read_again: buf2_len = xgbe_rx_buf2_len(rdata, packet, len); len += buf2_len; + if (buf2_len > rdata->rx.buf.dma_len) { + /* Hardware inconsistency within the descriptors + * that has resulted in a length underflow. + */ + error = 1; + goto skip_data; + } + if (!skb) { skb = xgbe_create_skb(pdata, napi, rdata, buf1_len); @@ -2586,8 +2596,10 @@ skip_data: if (!last || context_next) goto read_again; - if (!skb) + if (!skb || error) { + dev_kfree_skb(skb); goto next_packet; + } /* Be sure we don't exceed the configured MTU */ max_len = netdev->mtu + ETH_HLEN; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c index 9c076aa20306a..b6f5c1bcdbcd4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c @@ -183,18 +183,7 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev) { - struct mlx5e_rep_priv *rpriv; - struct mlx5e_priv *priv; - - /* A given netdev is not a representor or not a slave of LAG configuration */ - if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev)) - return false; - - priv = netdev_priv(netdev); - rpriv = priv->ppriv; - - /* Egress acl forward to vport is supported only non-uplink representor */ - return rpriv->rep->vport != MLX5_VPORT_UPLINK; + return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev); } static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr) @@ -210,9 +199,6 @@ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *pt u16 fwd_vport_num; int err; - if (!mlx5e_rep_is_lag_netdev(netdev)) - return; - info = ptr; lag_info = info->lower_state_info; /* This is not an event of a representor becoming active slave */ @@ -266,9 +252,6 @@ static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr) struct net_device *lag_dev; struct mlx5e_priv *priv; - if (!mlx5e_rep_is_lag_netdev(netdev)) - return; - priv = netdev_priv(netdev); rpriv = priv->ppriv; lag_dev = info->upper_dev; @@ -293,6 +276,19 @@ static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + struct mlx5e_rep_priv *rpriv; + struct mlx5e_rep_bond *bond; + struct mlx5e_priv *priv; + + if (!mlx5e_rep_is_lag_netdev(netdev)) + return NOTIFY_DONE; + + bond = container_of(nb, struct mlx5e_rep_bond, nb); + priv = netdev_priv(netdev); + rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH); + /* Verify VF representor is on the same device of the bond handling the netevent. */ + if (rpriv->uplink_priv.bond != bond) + return NOTIFY_DONE; switch (event) { case NETDEV_CHANGELOWERSTATE: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index ee710ce007950..9b472e793ee36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -131,7 +131,7 @@ static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; - del_timer(&fw_reset->timer); + del_timer_sync(&fw_reset->timer); } static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index 947f346bdc2d6..77c6287c90d55 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -292,7 +292,7 @@ static int create_chain_restore(struct fs_chain *chain) { struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch; - char modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)]; + u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; struct mlx5_fs_chains *chains = chain->chains; enum mlx5e_tc_attr_to_reg chain_to_reg; struct mlx5_modify_hdr *mod_hdr; diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index a37aae00e128f..621648ce750b7 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -901,27 +901,35 @@ static void ipa_endpoint_replenish(struct ipa_endpoint *endpoint, u32 count) struct gsi *gsi; u32 backlog; - if (!endpoint->replenish_enabled) { + if (!test_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags)) { if (count) atomic_add(count, &endpoint->replenish_saved); return; } + /* If already active, just update the backlog */ + if (test_and_set_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags)) { + if (count) + atomic_add(count, &endpoint->replenish_backlog); + return; + } while (atomic_dec_not_zero(&endpoint->replenish_backlog)) if (ipa_endpoint_replenish_one(endpoint)) goto try_again_later; + + clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags); + if (count) atomic_add(count, &endpoint->replenish_backlog); return; try_again_later: - /* The last one didn't succeed, so fix the backlog */ - backlog = atomic_inc_return(&endpoint->replenish_backlog); + clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags); - if (count) - atomic_add(count, &endpoint->replenish_backlog); + /* The last one didn't succeed, so fix the backlog */ + backlog = atomic_add_return(count + 1, &endpoint->replenish_backlog); /* Whenever a receive buffer transaction completes we'll try to * replenish again. It's unlikely, but if we fail to supply even @@ -941,7 +949,7 @@ static void ipa_endpoint_replenish_enable(struct ipa_endpoint *endpoint) u32 max_backlog; u32 saved; - endpoint->replenish_enabled = true; + set_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags); while ((saved = atomic_xchg(&endpoint->replenish_saved, 0))) atomic_add(saved, &endpoint->replenish_backlog); @@ -955,7 +963,7 @@ static void ipa_endpoint_replenish_disable(struct ipa_endpoint *endpoint) { u32 backlog; - endpoint->replenish_enabled = false; + clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags); while ((backlog = atomic_xchg(&endpoint->replenish_backlog, 0))) atomic_add(backlog, &endpoint->replenish_saved); } @@ -1472,7 +1480,8 @@ static void ipa_endpoint_setup_one(struct ipa_endpoint *endpoint) /* RX transactions require a single TRE, so the maximum * backlog is the same as the maximum outstanding TREs. */ - endpoint->replenish_enabled = false; + clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags); + clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags); atomic_set(&endpoint->replenish_saved, gsi_channel_tre_max(gsi, endpoint->channel_id)); atomic_set(&endpoint->replenish_backlog, 0); diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h index 58a245de488e8..823c4a1296587 100644 --- a/drivers/net/ipa/ipa_endpoint.h +++ b/drivers/net/ipa/ipa_endpoint.h @@ -39,6 +39,19 @@ enum ipa_endpoint_name { #define IPA_ENDPOINT_MAX 32 /* Max supported by driver */ +/** + * enum ipa_replenish_flag: RX buffer replenish flags + * + * @IPA_REPLENISH_ENABLED: Whether receive buffer replenishing is enabled + * @IPA_REPLENISH_ACTIVE: Whether replenishing is underway + * @IPA_REPLENISH_COUNT: Number of defined replenish flags + */ +enum ipa_replenish_flag { + IPA_REPLENISH_ENABLED, + IPA_REPLENISH_ACTIVE, + IPA_REPLENISH_COUNT, /* Number of flags (must be last) */ +}; + /** * struct ipa_endpoint - IPA endpoint information * @channel_id: EP's GSI channel @@ -60,7 +73,7 @@ struct ipa_endpoint { struct net_device *netdev; /* Receive buffer replenishing for RX endpoints */ - bool replenish_enabled; + DECLARE_BITMAP(replenish_flags, IPA_REPLENISH_COUNT); u32 replenish_ready; atomic_t replenish_saved; atomic_t replenish_backlog; diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 207e59e74935a..06d9f19ca142a 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -121,7 +121,7 @@ static int ipheth_alloc_urbs(struct ipheth_device *iphone) if (tx_buf == NULL) goto free_rx_urb; - rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE, + rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, GFP_KERNEL, &rx_urb->transfer_dma); if (rx_buf == NULL) goto free_tx_buf; @@ -146,7 +146,7 @@ error_nomem: static void ipheth_free_urbs(struct ipheth_device *iphone) { - usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->rx_buf, + usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, iphone->rx_buf, iphone->rx_urb->transfer_dma); usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->tx_buf, iphone->tx_urb->transfer_dma); @@ -317,7 +317,7 @@ static int ipheth_rx_submit(struct ipheth_device *dev, gfp_t mem_flags) usb_fill_bulk_urb(dev->rx_urb, udev, usb_rcvbulkpipe(udev, dev->bulk_in), - dev->rx_buf, IPHETH_BUF_SIZE, + dev->rx_buf, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, ipheth_rcvbulk_callback, dev); dev->rx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 90da17c6da664..30708af975adc 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -642,6 +642,8 @@ read_status: */ if (ctrl->power_fault_detected) status &= ~PCI_EXP_SLTSTA_PFD; + else if (status & PCI_EXP_SLTSTA_PFD) + ctrl->power_fault_detected = true; events |= status; if (!events) { @@ -651,7 +653,7 @@ read_status: } if (status) { - pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, events); + pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, status); /* * In MSI mode, all event bits must be zero before the port @@ -725,8 +727,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) } /* Check Power Fault Detected */ - if ((events & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) { - ctrl->power_fault_detected = 1; + if (events & PCI_EXP_SLTSTA_PFD) { ctrl_err(ctrl, "Slot(%s): Power fault\n", slot_name(ctrl)); pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF, PCI_EXP_SLTCTL_ATTN_IND_ON); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 086b6bacbad17..18e014fa06480 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -366,9 +366,6 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, if (fanotify_is_perm_event(event->mask)) FANOTIFY_PERM(event)->fd = fd; - if (f) - fd_install(fd, f); - /* Event info records order is: dir fid + name, child fid */ if (fanotify_event_dir_fh_len(event)) { info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME : @@ -432,6 +429,9 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, count -= ret; } + if (f) + fd_install(fd, f); + return metadata.event_len; out_close_fd: diff --git a/include/linux/psi.h b/include/linux/psi.h index 7361023f3fdd5..db4ecfaab8792 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -33,7 +33,7 @@ void cgroup_move_task(struct task_struct *p, struct css_set *to); struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf, size_t nbytes, enum psi_res res); -void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t); +void psi_trigger_destroy(struct psi_trigger *t); __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait); diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index b95f3211566a2..17d74f62c1818 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -128,9 +128,6 @@ struct psi_trigger { * events to one per window */ u64 last_event_time; - - /* Refcounting to prevent premature destruction */ - struct kref refcount; }; struct psi_group { diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 7f71b54c06c5f..69fba563c810e 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -545,6 +545,14 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); + /* + * Release agent gets called with all capabilities, + * require capabilities to set release agent. + */ + if ((of->file->f_cred->user_ns != &init_user_ns) || + !capable(CAP_SYS_ADMIN)) + return -EPERM; + cgrp = cgroup_kn_lock_live(of->kn, false); if (!cgrp) return -ENODEV; @@ -958,6 +966,12 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) /* Specifying two release agents is forbidden */ if (ctx->release_agent) return invalfc(fc, "release_agent respecified"); + /* + * Release agent gets called with all capabilities, + * require capabilities to set release agent. + */ + if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN)) + return invalfc(fc, "Setting release_agent not allowed"); ctx->release_agent = param->string; param->string = NULL; break; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index a86857edaa571..4927289a91a97 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3601,6 +3601,12 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, cgroup_get(cgrp); cgroup_kn_unlock(of->kn); + /* Allow only one trigger per file descriptor */ + if (of->priv) { + cgroup_put(cgrp); + return -EBUSY; + } + psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; new = psi_trigger_create(psi, buf, nbytes, res); if (IS_ERR(new)) { @@ -3608,8 +3614,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, return PTR_ERR(new); } - psi_trigger_replace(&of->priv, new); - + smp_store_release(&of->priv, new); cgroup_put(cgrp); return nbytes; @@ -3644,7 +3649,7 @@ static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of, static void cgroup_pressure_release(struct kernfs_open_file *of) { - psi_trigger_replace(&of->priv, NULL); + psi_trigger_destroy(of->priv); } #endif /* CONFIG_PSI */ diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 1999fcec45c71..7c7758a9e2c24 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1566,8 +1566,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, * Make sure that subparts_cpus is a subset of cpus_allowed. */ if (cs->nr_subparts_cpus) { - cpumask_andnot(cs->subparts_cpus, cs->subparts_cpus, - cs->cpus_allowed); + cpumask_and(cs->subparts_cpus, cs->subparts_cpus, cs->cpus_allowed); cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus); } spin_unlock_irq(&callback_lock); diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index d50a31ecedeec..b7f38f3ad42a2 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1116,7 +1116,6 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, t->event = 0; t->last_event_time = 0; init_waitqueue_head(&t->event_wait); - kref_init(&t->refcount); mutex_lock(&group->trigger_lock); @@ -1145,15 +1144,19 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, return t; } -static void psi_trigger_destroy(struct kref *ref) +void psi_trigger_destroy(struct psi_trigger *t) { - struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount); - struct psi_group *group = t->group; + struct psi_group *group; struct task_struct *task_to_destroy = NULL; - if (static_branch_likely(&psi_disabled)) + /* + * We do not check psi_disabled since it might have been disabled after + * the trigger got created. + */ + if (!t) return; + group = t->group; /* * Wakeup waiters to stop polling. Can happen if cgroup is deleted * from under a polling process. @@ -1189,9 +1192,9 @@ static void psi_trigger_destroy(struct kref *ref) mutex_unlock(&group->trigger_lock); /* - * Wait for both *trigger_ptr from psi_trigger_replace and - * poll_task RCUs to complete their read-side critical sections - * before destroying the trigger and optionally the poll_task + * Wait for psi_schedule_poll_work RCU to complete its read-side + * critical section before destroying the trigger and optionally the + * poll_task. */ synchronize_rcu(); /* @@ -1208,18 +1211,6 @@ static void psi_trigger_destroy(struct kref *ref) kfree(t); } -void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new) -{ - struct psi_trigger *old = *trigger_ptr; - - if (static_branch_likely(&psi_disabled)) - return; - - rcu_assign_pointer(*trigger_ptr, new); - if (old) - kref_put(&old->refcount, psi_trigger_destroy); -} - __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait) { @@ -1229,24 +1220,15 @@ __poll_t psi_trigger_poll(void **trigger_ptr, if (static_branch_likely(&psi_disabled)) return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI; - rcu_read_lock(); - - t = rcu_dereference(*(void __rcu __force **)trigger_ptr); - if (!t) { - rcu_read_unlock(); + t = smp_load_acquire(trigger_ptr); + if (!t) return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI; - } - kref_get(&t->refcount); - - rcu_read_unlock(); poll_wait(file, &t->event_wait, wait); if (cmpxchg(&t->event, 1, 0) == 1) ret |= EPOLLPRI; - kref_put(&t->refcount, psi_trigger_destroy); - return ret; } @@ -1270,14 +1252,24 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, buf[buf_size - 1] = '\0'; - new = psi_trigger_create(&psi_system, buf, nbytes, res); - if (IS_ERR(new)) - return PTR_ERR(new); - seq = file->private_data; + /* Take seq->lock to protect seq->private from concurrent writes */ mutex_lock(&seq->lock); - psi_trigger_replace(&seq->private, new); + + /* Allow only one trigger per file descriptor */ + if (seq->private) { + mutex_unlock(&seq->lock); + return -EBUSY; + } + + new = psi_trigger_create(&psi_system, buf, nbytes, res); + if (IS_ERR(new)) { + mutex_unlock(&seq->lock); + return PTR_ERR(new); + } + + smp_store_release(&seq->private, new); mutex_unlock(&seq->lock); return nbytes; @@ -1312,7 +1304,7 @@ static int psi_fop_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; - psi_trigger_replace(&seq->private, NULL); + psi_trigger_destroy(seq->private); return single_release(inode, file); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 27ffa83ffeb3c..373564bf57acb 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3238,8 +3238,8 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; unsigned char name_assign_type = NET_NAME_USER; struct nlattr *linkinfo[IFLA_INFO_MAX + 1]; - const struct rtnl_link_ops *m_ops = NULL; - struct net_device *master_dev = NULL; + const struct rtnl_link_ops *m_ops; + struct net_device *master_dev; struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; struct nlattr *tb[IFLA_MAX + 1]; @@ -3277,6 +3277,8 @@ replay: else dev = NULL; + master_dev = NULL; + m_ops = NULL; if (dev) { master_dev = netdev_master_upper_dev_get(dev); if (master_dev) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 991e3434957b8..12dd08af12b5e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1620,6 +1620,8 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, (mss != tcp_skb_seglen(skb))) goto out; + if (!tcp_skb_can_collapse(prev, skb)) + goto out; len = skb->len; pcount = tcp_skb_pcount(skb); if (tcp_skb_shift(prev, skb, pcount, len)) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6ef035494f30d..a31334b92be7e 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1750,7 +1750,10 @@ static int fanout_add(struct sock *sk, struct fanout_args *args) err = -ENOSPC; if (refcount_read(&match->sk_ref) < match->max_num_members) { __dev_remove_pack(&po->prot_hook); - po->fanout = match; + + /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */ + WRITE_ONCE(po->fanout, match); + po->rollover = rollover; rollover = NULL; refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); @@ -3906,7 +3909,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, } case PACKET_FANOUT_DATA: { - if (!po->fanout) + /* Paired with the WRITE_ONCE() in fanout_add() */ + if (!READ_ONCE(po->fanout)) return -EINVAL; return fanout_set_data(po, optval, optlen); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index cb1331b357451..7993a692c7fda 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1954,9 +1954,9 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, bool prio_allocate; u32 parent; u32 chain_index; - struct Qdisc *q = NULL; + struct Qdisc *q; struct tcf_chain_info chain_info; - struct tcf_chain *chain = NULL; + struct tcf_chain *chain; struct tcf_block *block; struct tcf_proto *tp; unsigned long cl; @@ -1984,6 +1984,8 @@ replay: tp = NULL; cl = 0; block = NULL; + q = NULL; + chain = NULL; if (prio == 0) { /* If no priority is provided by the user, @@ -2804,8 +2806,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, struct tcmsg *t; u32 parent; u32 chain_index; - struct Qdisc *q = NULL; - struct tcf_chain *chain = NULL; + struct Qdisc *q; + struct tcf_chain *chain; struct tcf_block *block; unsigned long cl; int err; @@ -2815,6 +2817,7 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, return -EPERM; replay: + q = NULL; err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0)