diff --git a/Makefile b/Makefile index ffc9b4e3867e..81b0e99dce80 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 58 +SUBLEVEL = 59 EXTRAVERSION = NAME = Petit Gorille @@ -642,6 +642,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation) KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context) +KBUILD_CFLAGS += $(call cc-disable-warning, attribute-alias) ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += $(call cc-option,-Oz,-Os) diff --git a/arch/mips/ath79/common.c b/arch/mips/ath79/common.c index 10a405d593df..c782b10ddf50 100644 --- a/arch/mips/ath79/common.c +++ b/arch/mips/ath79/common.c @@ -58,7 +58,7 @@ EXPORT_SYMBOL_GPL(ath79_ddr_ctrl_init); void ath79_ddr_wb_flush(u32 reg) { - void __iomem *flush_reg = ath79_ddr_wb_flush_base + reg; + void __iomem *flush_reg = ath79_ddr_wb_flush_base + (reg * 4); /* Flush the DDR write buffer. */ __raw_writel(0x1, flush_reg); diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c index 9632436d74d7..c2e94cf5ecda 100644 --- a/arch/mips/pci/pci.c +++ b/arch/mips/pci/pci.c @@ -54,5 +54,5 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar, phys_addr_t size = resource_size(rsrc); *start = fixup_bigphys_addr(rsrc->start, size); - *end = rsrc->start + size; + *end = rsrc->start + size - 1; } diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 44fdf4786638..6f67ff5a5267 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -35,9 +35,9 @@ extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm( extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, unsigned long ua, unsigned long entries); extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa); + unsigned long ua, unsigned int pageshift, unsigned long *hpa); extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa); + unsigned long ua, unsigned int pageshift, unsigned long *hpa); extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem); #endif diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 4dffa611376d..e14cec6bc339 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -433,7 +433,7 @@ long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, /* This only handles v2 IOMMU type, v1 is handled via ioctl() */ return H_TOO_HARD; - if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa))) + if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa))) return H_HARDWARE; if (mm_iommu_mapped_inc(mem)) diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index c32e9bfe75b1..648cf6c01348 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -262,7 +262,8 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, if (!mem) return H_TOO_HARD; - if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa))) + if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift, + &hpa))) return H_HARDWARE; pua = (void *) vmalloc_to_phys(pua); @@ -431,7 +432,8 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); if (mem) - prereg = mm_iommu_ua_to_hpa_rm(mem, ua, &tces) == 0; + prereg = mm_iommu_ua_to_hpa_rm(mem, ua, + IOMMU_PAGE_SHIFT_4K, &tces) == 0; } if (!prereg) { diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index e0a2d8e806ed..816055927ee4 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -19,6 +19,7 @@ #include #include #include +#include static DEFINE_MUTEX(mem_list_mutex); @@ -27,6 +28,7 @@ struct mm_iommu_table_group_mem_t { struct rcu_head rcu; unsigned long used; atomic64_t mapped; + unsigned int pageshift; u64 ua; /* userspace address */ u64 entries; /* number of entries in hpas[] */ u64 *hpas; /* vmalloc'ed */ @@ -126,6 +128,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, { struct mm_iommu_table_group_mem_t *mem; long i, j, ret = 0, locked_entries = 0; + unsigned int pageshift; + unsigned long flags; struct page *page = NULL; mutex_lock(&mem_list_mutex); @@ -160,6 +164,12 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, goto unlock_exit; } + /* + * For a starting point for a maximum page size calculation + * we use @ua and @entries natural alignment to allow IOMMU pages + * smaller than huge pages but still bigger than PAGE_SIZE. + */ + mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT)); mem->hpas = vzalloc(entries * sizeof(mem->hpas[0])); if (!mem->hpas) { kfree(mem); @@ -200,6 +210,23 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, } } populate: + pageshift = PAGE_SHIFT; + if (PageCompound(page)) { + pte_t *pte; + struct page *head = compound_head(page); + unsigned int compshift = compound_order(head); + + local_irq_save(flags); /* disables as well */ + pte = find_linux_pte(mm->pgd, ua, NULL, &pageshift); + local_irq_restore(flags); + + /* Double check it is still the same pinned page */ + if (pte && pte_page(*pte) == head && + pageshift == compshift) + pageshift = max_t(unsigned int, pageshift, + PAGE_SHIFT); + } + mem->pageshift = min(mem->pageshift, pageshift); mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; } @@ -350,7 +377,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, EXPORT_SYMBOL_GPL(mm_iommu_find); long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa) + unsigned long ua, unsigned int pageshift, unsigned long *hpa) { const long entry = (ua - mem->ua) >> PAGE_SHIFT; u64 *va = &mem->hpas[entry]; @@ -358,6 +385,9 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, if (entry >= mem->entries) return -EFAULT; + if (pageshift > mem->pageshift) + return -EFAULT; + *hpa = *va | (ua & ~PAGE_MASK); return 0; @@ -365,7 +395,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa) + unsigned long ua, unsigned int pageshift, unsigned long *hpa) { const long entry = (ua - mem->ua) >> PAGE_SHIFT; void *va = &mem->hpas[entry]; @@ -374,6 +404,9 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, if (entry >= mem->entries) return -EFAULT; + if (pageshift > mem->pageshift) + return -EFAULT; + pa = (void *) vmalloc_to_phys(va); if (!pa) return -EFAULT; diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S index e1a5fbeae08d..5d7554c025fd 100644 --- a/arch/x86/xen/xen-pvh.S +++ b/arch/x86/xen/xen-pvh.S @@ -54,6 +54,9 @@ * charge of setting up it's own stack, GDT and IDT. */ +#define PVH_GDT_ENTRY_CANARY 4 +#define PVH_CANARY_SEL (PVH_GDT_ENTRY_CANARY * 8) + ENTRY(pvh_start_xen) cld @@ -98,6 +101,12 @@ ENTRY(pvh_start_xen) /* 64-bit entry point. */ .code64 1: + /* Set base address in stack canary descriptor. */ + mov $MSR_GS_BASE,%ecx + mov $_pa(canary), %eax + xor %edx, %edx + wrmsr + call xen_prepare_pvh /* startup_64 expects boot_params in %rsi. */ @@ -107,6 +116,17 @@ ENTRY(pvh_start_xen) #else /* CONFIG_X86_64 */ + /* Set base address in stack canary descriptor. */ + movl $_pa(gdt_start),%eax + movl $_pa(canary),%ecx + movw %cx, (PVH_GDT_ENTRY_CANARY * 8) + 2(%eax) + shrl $16, %ecx + movb %cl, (PVH_GDT_ENTRY_CANARY * 8) + 4(%eax) + movb %ch, (PVH_GDT_ENTRY_CANARY * 8) + 7(%eax) + + mov $PVH_CANARY_SEL,%eax + mov %eax,%gs + call mk_early_pgtbl_32 mov $_pa(initial_page_table), %eax @@ -150,9 +170,13 @@ gdt_start: .quad GDT_ENTRY(0xc09a, 0, 0xfffff) /* __KERNEL_CS */ #endif .quad GDT_ENTRY(0xc092, 0, 0xfffff) /* __KERNEL_DS */ + .quad GDT_ENTRY(0x4090, 0, 0x18) /* PVH_CANARY_SEL */ gdt_end: - .balign 4 + .balign 16 +canary: + .fill 48, 1, 0 + early_stack: .fill 256, 1, 0 early_stack_end: diff --git a/drivers/base/dd.c b/drivers/base/dd.c index ad44b40fe284..55fc31f6fe7f 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -401,14 +401,6 @@ re_probe: goto probe_failed; } - /* - * Ensure devices are listed in devices_kset in correct order - * It's important to move Dev to the end of devices_kset before - * calling .probe, because it could be recursive and parent Dev - * should always go first - */ - devices_kset_move_last(dev); - if (dev->bus->probe) { ret = dev->bus->probe(dev); if (ret) diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c index 5b9d549aa791..e7926da59214 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c @@ -55,6 +55,9 @@ nv04_display_create(struct drm_device *dev) nouveau_display(dev)->init = nv04_display_init; nouveau_display(dev)->fini = nv04_display_fini; + /* Pre-nv50 doesn't support atomic, so don't expose the ioctls */ + dev->driver->driver_features &= ~DRIVER_ATOMIC; + nouveau_hw_save_vga_fonts(dev, 1); nv04_crtc_create(dev, 0); diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 595630d1fb9e..362a34cb435d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -79,6 +79,10 @@ MODULE_PARM_DESC(modeset, "enable driver (default: auto, " int nouveau_modeset = -1; module_param_named(modeset, nouveau_modeset, int, 0400); +MODULE_PARM_DESC(atomic, "Expose atomic ioctl (default: disabled)"); +static int nouveau_atomic = 0; +module_param_named(atomic, nouveau_atomic, int, 0400); + MODULE_PARM_DESC(runpm, "disable (0), force enable (1), optimus only default (-1)"); static int nouveau_runtime_pm = -1; module_param_named(runpm, nouveau_runtime_pm, int, 0400); @@ -383,6 +387,9 @@ static int nouveau_drm_probe(struct pci_dev *pdev, pci_set_master(pdev); + if (nouveau_atomic) + driver_pci.driver_features |= DRIVER_ATOMIC; + ret = drm_get_pci_dev(pdev, pent, &driver_pci); if (ret) { nvkm_device_del(&device); diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index a29474528e85..926ec51ba5be 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -4150,7 +4150,7 @@ nv50_disp_atomic_commit(struct drm_device *dev, nv50_disp_atomic_commit_tail(state); drm_for_each_crtc(crtc, dev) { - if (crtc->state->enable) { + if (crtc->state->active) { if (!drm->have_disp_power_ref) { drm->have_disp_power_ref = true; return 0; @@ -4398,10 +4398,6 @@ nv50_display_destroy(struct drm_device *dev) kfree(disp); } -MODULE_PARM_DESC(atomic, "Expose atomic ioctl (default: disabled)"); -static int nouveau_atomic = 0; -module_param_named(atomic, nouveau_atomic, int, 0400); - int nv50_display_create(struct drm_device *dev) { @@ -4426,8 +4422,6 @@ nv50_display_create(struct drm_device *dev) disp->disp = &nouveau_display(dev)->disp; dev->mode_config.funcs = &nv50_disp_func; dev->driver->driver_features |= DRIVER_PREFER_XBGR_30BPP; - if (nouveau_atomic) - dev->driver->driver_features |= DRIVER_ATOMIC; /* small shared memory area we use for notifiers and semaphores */ ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM, diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 61084ba69a99..3d154eb63dcf 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -743,15 +743,20 @@ const struct bond_option *bond_opt_get(unsigned int option) static int bond_option_mode_set(struct bonding *bond, const struct bond_opt_value *newval) { - if (!bond_mode_uses_arp(newval->value) && bond->params.arp_interval) { - netdev_dbg(bond->dev, "%s mode is incompatible with arp monitoring, start mii monitoring\n", - newval->string); - /* disable arp monitoring */ - bond->params.arp_interval = 0; - /* set miimon to default value */ - bond->params.miimon = BOND_DEFAULT_MIIMON; - netdev_dbg(bond->dev, "Setting MII monitoring interval to %d\n", - bond->params.miimon); + if (!bond_mode_uses_arp(newval->value)) { + if (bond->params.arp_interval) { + netdev_dbg(bond->dev, "%s mode is incompatible with arp monitoring, start mii monitoring\n", + newval->string); + /* disable arp monitoring */ + bond->params.arp_interval = 0; + } + + if (!bond->params.miimon) { + /* set miimon to default value */ + bond->params.miimon = BOND_DEFAULT_MIIMON; + netdev_dbg(bond->dev, "Setting MII monitoring interval to %d\n", + bond->params.miimon); + } } if (newval->value == BOND_MODE_ALB) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 5d4e61741476..ca3fa82316c2 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1073,7 +1073,8 @@ static void m_can_chip_config(struct net_device *dev) } else { /* Version 3.1.x or 3.2.x */ - cccr &= ~(CCCR_TEST | CCCR_MON | CCCR_BRSE | CCCR_FDOE); + cccr &= ~(CCCR_TEST | CCCR_MON | CCCR_BRSE | CCCR_FDOE | + CCCR_NISO); /* Only 3.2.x has NISO Bit implemented */ if (priv->can.ctrlmode & CAN_CTRLMODE_FD_NON_ISO) diff --git a/drivers/net/can/peak_canfd/peak_pciefd_main.c b/drivers/net/can/peak_canfd/peak_pciefd_main.c index 3c51a884db87..fa689854f16b 100644 --- a/drivers/net/can/peak_canfd/peak_pciefd_main.c +++ b/drivers/net/can/peak_canfd/peak_pciefd_main.c @@ -58,6 +58,10 @@ MODULE_LICENSE("GPL v2"); #define PCIEFD_REG_SYS_VER1 0x0040 /* version reg #1 */ #define PCIEFD_REG_SYS_VER2 0x0044 /* version reg #2 */ +#define PCIEFD_FW_VERSION(x, y, z) (((u32)(x) << 24) | \ + ((u32)(y) << 16) | \ + ((u32)(z) << 8)) + /* System Control Registers Bits */ #define PCIEFD_SYS_CTL_TS_RST 0x00000001 /* timestamp clock */ #define PCIEFD_SYS_CTL_CLK_EN 0x00000002 /* system clock */ @@ -783,6 +787,21 @@ static int peak_pciefd_probe(struct pci_dev *pdev, "%ux CAN-FD PCAN-PCIe FPGA v%u.%u.%u:\n", can_count, hw_ver_major, hw_ver_minor, hw_ver_sub); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + /* FW < v3.3.0 DMA logic doesn't handle correctly the mix of 32-bit and + * 64-bit logical addresses: this workaround forces usage of 32-bit + * DMA addresses only when such a fw is detected. + */ + if (PCIEFD_FW_VERSION(hw_ver_major, hw_ver_minor, hw_ver_sub) < + PCIEFD_FW_VERSION(3, 3, 0)) { + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) + dev_warn(&pdev->dev, + "warning: can't set DMA mask %llxh (err %d)\n", + DMA_BIT_MASK(32), err); + } +#endif + /* stop system clock */ pciefd_sys_writereg(pciefd, PCIEFD_SYS_CTL_CLK_EN, PCIEFD_REG_SYS_CTL_CLR); diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 89aec07c225f..5a24039733ef 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -2,6 +2,7 @@ * * Copyright (C) 2012 - 2014 Xilinx, Inc. * Copyright (C) 2009 PetaLogix. All rights reserved. + * Copyright (C) 2017 Sandvik Mining and Construction Oy * * Description: * This driver is developed for Axi CAN IP and for Zynq CANPS Controller. @@ -25,8 +26,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -101,7 +104,7 @@ enum xcan_reg { #define XCAN_INTR_ALL (XCAN_IXR_TXOK_MASK | XCAN_IXR_BSOFF_MASK |\ XCAN_IXR_WKUP_MASK | XCAN_IXR_SLP_MASK | \ XCAN_IXR_RXNEMP_MASK | XCAN_IXR_ERROR_MASK | \ - XCAN_IXR_ARBLST_MASK | XCAN_IXR_RXOK_MASK) + XCAN_IXR_RXOFLW_MASK | XCAN_IXR_ARBLST_MASK) /* CAN register bit shift - XCAN___SHIFT */ #define XCAN_BTR_SJW_SHIFT 7 /* Synchronous jump width */ @@ -118,6 +121,7 @@ enum xcan_reg { /** * struct xcan_priv - This definition define CAN driver instance * @can: CAN private data structure. + * @tx_lock: Lock for synchronizing TX interrupt handling * @tx_head: Tx CAN packets ready to send on the queue * @tx_tail: Tx CAN packets successfully sended on the queue * @tx_max: Maximum number packets the driver can send @@ -132,6 +136,7 @@ enum xcan_reg { */ struct xcan_priv { struct can_priv can; + spinlock_t tx_lock; unsigned int tx_head; unsigned int tx_tail; unsigned int tx_max; @@ -159,6 +164,11 @@ static const struct can_bittiming_const xcan_bittiming_const = { .brp_inc = 1, }; +#define XCAN_CAP_WATERMARK 0x0001 +struct xcan_devtype_data { + unsigned int caps; +}; + /** * xcan_write_reg_le - Write a value to the device register little endian * @priv: Driver private data structure @@ -238,6 +248,10 @@ static int set_reset_mode(struct net_device *ndev) usleep_range(500, 10000); } + /* reset clears FIFOs */ + priv->tx_head = 0; + priv->tx_tail = 0; + return 0; } @@ -392,6 +406,7 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev) struct net_device_stats *stats = &ndev->stats; struct can_frame *cf = (struct can_frame *)skb->data; u32 id, dlc, data[2] = {0, 0}; + unsigned long flags; if (can_dropped_invalid_skb(ndev, skb)) return NETDEV_TX_OK; @@ -439,6 +454,9 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev) data[1] = be32_to_cpup((__be32 *)(cf->data + 4)); can_put_echo_skb(skb, ndev, priv->tx_head % priv->tx_max); + + spin_lock_irqsave(&priv->tx_lock, flags); + priv->tx_head++; /* Write the Frame to Xilinx CAN TX FIFO */ @@ -454,10 +472,16 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev) stats->tx_bytes += cf->can_dlc; } + /* Clear TX-FIFO-empty interrupt for xcan_tx_interrupt() */ + if (priv->tx_max > 1) + priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXFEMP_MASK); + /* Check if the TX buffer is full */ if ((priv->tx_head - priv->tx_tail) == priv->tx_max) netif_stop_queue(ndev); + spin_unlock_irqrestore(&priv->tx_lock, flags); + return NETDEV_TX_OK; } @@ -529,6 +553,123 @@ static int xcan_rx(struct net_device *ndev) return 1; } +/** + * xcan_current_error_state - Get current error state from HW + * @ndev: Pointer to net_device structure + * + * Checks the current CAN error state from the HW. Note that this + * only checks for ERROR_PASSIVE and ERROR_WARNING. + * + * Return: + * ERROR_PASSIVE or ERROR_WARNING if either is active, ERROR_ACTIVE + * otherwise. + */ +static enum can_state xcan_current_error_state(struct net_device *ndev) +{ + struct xcan_priv *priv = netdev_priv(ndev); + u32 status = priv->read_reg(priv, XCAN_SR_OFFSET); + + if ((status & XCAN_SR_ESTAT_MASK) == XCAN_SR_ESTAT_MASK) + return CAN_STATE_ERROR_PASSIVE; + else if (status & XCAN_SR_ERRWRN_MASK) + return CAN_STATE_ERROR_WARNING; + else + return CAN_STATE_ERROR_ACTIVE; +} + +/** + * xcan_set_error_state - Set new CAN error state + * @ndev: Pointer to net_device structure + * @new_state: The new CAN state to be set + * @cf: Error frame to be populated or NULL + * + * Set new CAN error state for the device, updating statistics and + * populating the error frame if given. + */ +static void xcan_set_error_state(struct net_device *ndev, + enum can_state new_state, + struct can_frame *cf) +{ + struct xcan_priv *priv = netdev_priv(ndev); + u32 ecr = priv->read_reg(priv, XCAN_ECR_OFFSET); + u32 txerr = ecr & XCAN_ECR_TEC_MASK; + u32 rxerr = (ecr & XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT; + + priv->can.state = new_state; + + if (cf) { + cf->can_id |= CAN_ERR_CRTL; + cf->data[6] = txerr; + cf->data[7] = rxerr; + } + + switch (new_state) { + case CAN_STATE_ERROR_PASSIVE: + priv->can.can_stats.error_passive++; + if (cf) + cf->data[1] = (rxerr > 127) ? + CAN_ERR_CRTL_RX_PASSIVE : + CAN_ERR_CRTL_TX_PASSIVE; + break; + case CAN_STATE_ERROR_WARNING: + priv->can.can_stats.error_warning++; + if (cf) + cf->data[1] |= (txerr > rxerr) ? + CAN_ERR_CRTL_TX_WARNING : + CAN_ERR_CRTL_RX_WARNING; + break; + case CAN_STATE_ERROR_ACTIVE: + if (cf) + cf->data[1] |= CAN_ERR_CRTL_ACTIVE; + break; + default: + /* non-ERROR states are handled elsewhere */ + WARN_ON(1); + break; + } +} + +/** + * xcan_update_error_state_after_rxtx - Update CAN error state after RX/TX + * @ndev: Pointer to net_device structure + * + * If the device is in a ERROR-WARNING or ERROR-PASSIVE state, check if + * the performed RX/TX has caused it to drop to a lesser state and set + * the interface state accordingly. + */ +static void xcan_update_error_state_after_rxtx(struct net_device *ndev) +{ + struct xcan_priv *priv = netdev_priv(ndev); + enum can_state old_state = priv->can.state; + enum can_state new_state; + + /* changing error state due to successful frame RX/TX can only + * occur from these states + */ + if (old_state != CAN_STATE_ERROR_WARNING && + old_state != CAN_STATE_ERROR_PASSIVE) + return; + + new_state = xcan_current_error_state(ndev); + + if (new_state != old_state) { + struct sk_buff *skb; + struct can_frame *cf; + + skb = alloc_can_err_skb(ndev, &cf); + + xcan_set_error_state(ndev, new_state, skb ? cf : NULL); + + if (skb) { + struct net_device_stats *stats = &ndev->stats; + + stats->rx_packets++; + stats->rx_bytes += cf->can_dlc; + netif_rx(skb); + } + } +} + /** * xcan_err_interrupt - error frame Isr * @ndev: net_device pointer @@ -544,16 +685,12 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) struct net_device_stats *stats = &ndev->stats; struct can_frame *cf; struct sk_buff *skb; - u32 err_status, status, txerr = 0, rxerr = 0; + u32 err_status; skb = alloc_can_err_skb(ndev, &cf); err_status = priv->read_reg(priv, XCAN_ESR_OFFSET); priv->write_reg(priv, XCAN_ESR_OFFSET, err_status); - txerr = priv->read_reg(priv, XCAN_ECR_OFFSET) & XCAN_ECR_TEC_MASK; - rxerr = ((priv->read_reg(priv, XCAN_ECR_OFFSET) & - XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT); - status = priv->read_reg(priv, XCAN_SR_OFFSET); if (isr & XCAN_IXR_BSOFF_MASK) { priv->can.state = CAN_STATE_BUS_OFF; @@ -563,28 +700,10 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) can_bus_off(ndev); if (skb) cf->can_id |= CAN_ERR_BUSOFF; - } else if ((status & XCAN_SR_ESTAT_MASK) == XCAN_SR_ESTAT_MASK) { - priv->can.state = CAN_STATE_ERROR_PASSIVE; - priv->can.can_stats.error_passive++; - if (skb) { - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] = (rxerr > 127) ? - CAN_ERR_CRTL_RX_PASSIVE : - CAN_ERR_CRTL_TX_PASSIVE; - cf->data[6] = txerr; - cf->data[7] = rxerr; - } - } else if (status & XCAN_SR_ERRWRN_MASK) { - priv->can.state = CAN_STATE_ERROR_WARNING; - priv->can.can_stats.error_warning++; - if (skb) { - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] |= (txerr > rxerr) ? - CAN_ERR_CRTL_TX_WARNING : - CAN_ERR_CRTL_RX_WARNING; - cf->data[6] = txerr; - cf->data[7] = rxerr; - } + } else { + enum can_state new_state = xcan_current_error_state(ndev); + + xcan_set_error_state(ndev, new_state, skb ? cf : NULL); } /* Check for Arbitration lost interrupt */ @@ -600,7 +719,6 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) if (isr & XCAN_IXR_RXOFLW_MASK) { stats->rx_over_errors++; stats->rx_errors++; - priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK); if (skb) { cf->can_id |= CAN_ERR_CRTL; cf->data[1] |= CAN_ERR_CRTL_RX_OVERFLOW; @@ -709,26 +827,20 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota) isr = priv->read_reg(priv, XCAN_ISR_OFFSET); while ((isr & XCAN_IXR_RXNEMP_MASK) && (work_done < quota)) { - if (isr & XCAN_IXR_RXOK_MASK) { - priv->write_reg(priv, XCAN_ICR_OFFSET, - XCAN_IXR_RXOK_MASK); - work_done += xcan_rx(ndev); - } else { - priv->write_reg(priv, XCAN_ICR_OFFSET, - XCAN_IXR_RXNEMP_MASK); - break; - } + work_done += xcan_rx(ndev); priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_RXNEMP_MASK); isr = priv->read_reg(priv, XCAN_ISR_OFFSET); } - if (work_done) + if (work_done) { can_led_event(ndev, CAN_LED_EVENT_RX); + xcan_update_error_state_after_rxtx(ndev); + } if (work_done < quota) { napi_complete_done(napi, work_done); ier = priv->read_reg(priv, XCAN_IER_OFFSET); - ier |= (XCAN_IXR_RXOK_MASK | XCAN_IXR_RXNEMP_MASK); + ier |= XCAN_IXR_RXNEMP_MASK; priv->write_reg(priv, XCAN_IER_OFFSET, ier); } return work_done; @@ -743,18 +855,71 @@ static void xcan_tx_interrupt(struct net_device *ndev, u32 isr) { struct xcan_priv *priv = netdev_priv(ndev); struct net_device_stats *stats = &ndev->stats; + unsigned int frames_in_fifo; + int frames_sent = 1; /* TXOK => at least 1 frame was sent */ + unsigned long flags; + int retries = 0; + + /* Synchronize with xmit as we need to know the exact number + * of frames in the FIFO to stay in sync due to the TXFEMP + * handling. + * This also prevents a race between netif_wake_queue() and + * netif_stop_queue(). + */ + spin_lock_irqsave(&priv->tx_lock, flags); + + frames_in_fifo = priv->tx_head - priv->tx_tail; + + if (WARN_ON_ONCE(frames_in_fifo == 0)) { + /* clear TXOK anyway to avoid getting back here */ + priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK); + spin_unlock_irqrestore(&priv->tx_lock, flags); + return; + } + + /* Check if 2 frames were sent (TXOK only means that at least 1 + * frame was sent). + */ + if (frames_in_fifo > 1) { + WARN_ON(frames_in_fifo > priv->tx_max); + + /* Synchronize TXOK and isr so that after the loop: + * (1) isr variable is up-to-date at least up to TXOK clear + * time. This avoids us clearing a TXOK of a second frame + * but not noticing that the FIFO is now empty and thus + * marking only a single frame as sent. + * (2) No TXOK is left. Having one could mean leaving a + * stray TXOK as we might process the associated frame + * via TXFEMP handling as we read TXFEMP *after* TXOK + * clear to satisfy (1). + */ + while ((isr & XCAN_IXR_TXOK_MASK) && !WARN_ON(++retries == 100)) { + priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK); + isr = priv->read_reg(priv, XCAN_ISR_OFFSET); + } - while ((priv->tx_head - priv->tx_tail > 0) && - (isr & XCAN_IXR_TXOK_MASK)) { + if (isr & XCAN_IXR_TXFEMP_MASK) { + /* nothing in FIFO anymore */ + frames_sent = frames_in_fifo; + } + } else { + /* single frame in fifo, just clear TXOK */ priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK); + } + + while (frames_sent--) { can_get_echo_skb(ndev, priv->tx_tail % priv->tx_max); priv->tx_tail++; stats->tx_packets++; - isr = priv->read_reg(priv, XCAN_ISR_OFFSET); } - can_led_event(ndev, CAN_LED_EVENT_TX); + netif_wake_queue(ndev); + + spin_unlock_irqrestore(&priv->tx_lock, flags); + + can_led_event(ndev, CAN_LED_EVENT_TX); + xcan_update_error_state_after_rxtx(ndev); } /** @@ -773,6 +938,7 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id) struct net_device *ndev = (struct net_device *)dev_id; struct xcan_priv *priv = netdev_priv(ndev); u32 isr, ier; + u32 isr_errors; /* Get the interrupt status from Xilinx CAN */ isr = priv->read_reg(priv, XCAN_ISR_OFFSET); @@ -791,18 +957,17 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id) xcan_tx_interrupt(ndev, isr); /* Check for the type of error interrupt and Processing it */ - if (isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK | - XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK)) { - priv->write_reg(priv, XCAN_ICR_OFFSET, (XCAN_IXR_ERROR_MASK | - XCAN_IXR_RXOFLW_MASK | XCAN_IXR_BSOFF_MASK | - XCAN_IXR_ARBLST_MASK)); + isr_errors = isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK | + XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK); + if (isr_errors) { + priv->write_reg(priv, XCAN_ICR_OFFSET, isr_errors); xcan_err_interrupt(ndev, isr); } /* Check for the type of receive interrupt and Processing it */ - if (isr & (XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK)) { + if (isr & XCAN_IXR_RXNEMP_MASK) { ier = priv->read_reg(priv, XCAN_IER_OFFSET); - ier &= ~(XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK); + ier &= ~XCAN_IXR_RXNEMP_MASK; priv->write_reg(priv, XCAN_IER_OFFSET, ier); napi_schedule(&priv->napi); } @@ -819,13 +984,9 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id) static void xcan_chip_stop(struct net_device *ndev) { struct xcan_priv *priv = netdev_priv(ndev); - u32 ier; /* Disable interrupts and leave the can in configuration mode */ - ier = priv->read_reg(priv, XCAN_IER_OFFSET); - ier &= ~XCAN_INTR_ALL; - priv->write_reg(priv, XCAN_IER_OFFSET, ier); - priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK); + set_reset_mode(ndev); priv->can.state = CAN_STATE_STOPPED; } @@ -958,10 +1119,15 @@ static const struct net_device_ops xcan_netdev_ops = { */ static int __maybe_unused xcan_suspend(struct device *dev) { - if (!device_may_wakeup(dev)) - return pm_runtime_force_suspend(dev); + struct net_device *ndev = dev_get_drvdata(dev); - return 0; + if (netif_running(ndev)) { + netif_stop_queue(ndev); + netif_device_detach(ndev); + xcan_chip_stop(ndev); + } + + return pm_runtime_force_suspend(dev); } /** @@ -973,11 +1139,27 @@ static int __maybe_unused xcan_suspend(struct device *dev) */ static int __maybe_unused xcan_resume(struct device *dev) { - if (!device_may_wakeup(dev)) - return pm_runtime_force_resume(dev); + struct net_device *ndev = dev_get_drvdata(dev); + int ret; - return 0; + ret = pm_runtime_force_resume(dev); + if (ret) { + dev_err(dev, "pm_runtime_force_resume failed on resume\n"); + return ret; + } + + if (netif_running(ndev)) { + ret = xcan_chip_start(ndev); + if (ret) { + dev_err(dev, "xcan_chip_start failed on resume\n"); + return ret; + } + + netif_device_attach(ndev); + netif_start_queue(ndev); + } + return 0; } /** @@ -992,14 +1174,6 @@ static int __maybe_unused xcan_runtime_suspend(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct xcan_priv *priv = netdev_priv(ndev); - if (netif_running(ndev)) { - netif_stop_queue(ndev); - netif_device_detach(ndev); - } - - priv->write_reg(priv, XCAN_MSR_OFFSET, XCAN_MSR_SLEEP_MASK); - priv->can.state = CAN_STATE_SLEEPING; - clk_disable_unprepare(priv->bus_clk); clk_disable_unprepare(priv->can_clk); @@ -1018,7 +1192,6 @@ static int __maybe_unused xcan_runtime_resume(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct xcan_priv *priv = netdev_priv(ndev); int ret; - u32 isr, status; ret = clk_prepare_enable(priv->bus_clk); if (ret) { @@ -1032,27 +1205,6 @@ static int __maybe_unused xcan_runtime_resume(struct device *dev) return ret; } - priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK); - isr = priv->read_reg(priv, XCAN_ISR_OFFSET); - status = priv->read_reg(priv, XCAN_SR_OFFSET); - - if (netif_running(ndev)) { - if (isr & XCAN_IXR_BSOFF_MASK) { - priv->can.state = CAN_STATE_BUS_OFF; - priv->write_reg(priv, XCAN_SRR_OFFSET, - XCAN_SRR_RESET_MASK); - } else if ((status & XCAN_SR_ESTAT_MASK) == - XCAN_SR_ESTAT_MASK) { - priv->can.state = CAN_STATE_ERROR_PASSIVE; - } else if (status & XCAN_SR_ERRWRN_MASK) { - priv->can.state = CAN_STATE_ERROR_WARNING; - } else { - priv->can.state = CAN_STATE_ERROR_ACTIVE; - } - netif_device_attach(ndev); - netif_start_queue(ndev); - } - return 0; } @@ -1061,6 +1213,18 @@ static const struct dev_pm_ops xcan_dev_pm_ops = { SET_RUNTIME_PM_OPS(xcan_runtime_suspend, xcan_runtime_resume, NULL) }; +static const struct xcan_devtype_data xcan_zynq_data = { + .caps = XCAN_CAP_WATERMARK, +}; + +/* Match table for OF platform binding */ +static const struct of_device_id xcan_of_match[] = { + { .compatible = "xlnx,zynq-can-1.0", .data = &xcan_zynq_data }, + { .compatible = "xlnx,axi-can-1.00.a", }, + { /* end of list */ }, +}; +MODULE_DEVICE_TABLE(of, xcan_of_match); + /** * xcan_probe - Platform registration call * @pdev: Handle to the platform device structure @@ -1075,8 +1239,10 @@ static int xcan_probe(struct platform_device *pdev) struct resource *res; /* IO mem resources */ struct net_device *ndev; struct xcan_priv *priv; + const struct of_device_id *of_id; + int caps = 0; void __iomem *addr; - int ret, rx_max, tx_max; + int ret, rx_max, tx_max, tx_fifo_depth; /* Get the virtual base address for the device */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -1086,7 +1252,8 @@ static int xcan_probe(struct platform_device *pdev) goto err; } - ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth", &tx_max); + ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth", + &tx_fifo_depth); if (ret < 0) goto err; @@ -1094,6 +1261,30 @@ static int xcan_probe(struct platform_device *pdev) if (ret < 0) goto err; + of_id = of_match_device(xcan_of_match, &pdev->dev); + if (of_id) { + const struct xcan_devtype_data *devtype_data = of_id->data; + + if (devtype_data) + caps = devtype_data->caps; + } + + /* There is no way to directly figure out how many frames have been + * sent when the TXOK interrupt is processed. If watermark programming + * is supported, we can have 2 frames in the FIFO and use TXFEMP + * to determine if 1 or 2 frames have been sent. + * Theoretically we should be able to use TXFWMEMP to determine up + * to 3 frames, but it seems that after putting a second frame in the + * FIFO, with watermark at 2 frames, it can happen that TXFWMEMP (less + * than 2 frames in FIFO) is set anyway with no TXOK (a frame was + * sent), which is not a sensible state - possibly TXFWMEMP is not + * completely synchronized with the rest of the bits? + */ + if (caps & XCAN_CAP_WATERMARK) + tx_max = min(tx_fifo_depth, 2); + else + tx_max = 1; + /* Create a CAN device instance */ ndev = alloc_candev(sizeof(struct xcan_priv), tx_max); if (!ndev) @@ -1108,6 +1299,7 @@ static int xcan_probe(struct platform_device *pdev) CAN_CTRLMODE_BERR_REPORTING; priv->reg_base = addr; priv->tx_max = tx_max; + spin_lock_init(&priv->tx_lock); /* Get IRQ for the device */ ndev->irq = platform_get_irq(pdev, 0); @@ -1172,9 +1364,9 @@ static int xcan_probe(struct platform_device *pdev) pm_runtime_put(&pdev->dev); - netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth:%d\n", + netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth: actual %d, using %d\n", priv->reg_base, ndev->irq, priv->can.clock.freq, - priv->tx_max); + tx_fifo_depth, priv->tx_max); return 0; @@ -1208,14 +1400,6 @@ static int xcan_remove(struct platform_device *pdev) return 0; } -/* Match table for OF platform binding */ -static const struct of_device_id xcan_of_match[] = { - { .compatible = "xlnx,zynq-can-1.0", }, - { .compatible = "xlnx,axi-can-1.00.a", }, - { /* end of list */ }, -}; -MODULE_DEVICE_TABLE(of, xcan_of_match); - static struct platform_driver xcan_driver = { .probe = xcan_probe, .remove = xcan_remove, diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index a069fcc823c3..b26da0952a4d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2957,7 +2957,7 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, u32 srqn = qp_get_srqn(qpc) & 0xffffff; int use_srq = (qp_get_srqn(qpc) >> 24) & 1; struct res_srq *srq; - int local_qpn = be32_to_cpu(qpc->local_qpn) & 0xffffff; + int local_qpn = vhcr->in_modifier & 0xffffff; err = adjust_qp_sched_queue(dev, slave, qpc, inbox); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 12d3ced61114..e87923e046c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -381,14 +381,14 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv) HLIST_HEAD(del_list); spin_lock_bh(&priv->fs.arfs.arfs_lock); mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs.arfs_tables, i, j) { - if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA) - break; if (!work_pending(&arfs_rule->arfs_work) && rps_may_expire_flow(priv->netdev, arfs_rule->rxq, arfs_rule->flow_id, arfs_rule->filter_id)) { hlist_del_init(&arfs_rule->hlist); hlist_add_head(&arfs_rule->hlist, &del_list); + if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA) + break; } } spin_unlock_bh(&priv->fs.arfs.arfs_lock); @@ -711,6 +711,9 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, skb->protocol != htons(ETH_P_IPV6)) return -EPROTONOSUPPORT; + if (skb->encapsulation) + return -EPROTONOSUPPORT; + arfs_t = arfs_get_table(arfs, arfs_get_ip_proto(skb), skb->protocol); if (!arfs_t) return -EPROTONOSUPPORT; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index 84dd63e74041..27040009d87a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -545,6 +545,7 @@ void mlx5e_pps_event_handler(struct mlx5e_priv *priv, void mlx5e_timestamp_init(struct mlx5e_priv *priv) { struct mlx5e_tstamp *tstamp = &priv->tstamp; + u64 overflow_cycles; u64 ns; u64 frac = 0; u32 dev_freq; @@ -569,10 +570,17 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) /* Calculate period in seconds to call the overflow watchdog - to make * sure counter is checked at least once every wrap around. + * The period is calculated as the minimum between max HW cycles count + * (The clock source mask) and max amount of cycles that can be + * multiplied by clock multiplier where the result doesn't exceed + * 64bits. */ - ns = cyclecounter_cyc2ns(&tstamp->cycles, tstamp->cycles.mask, + overflow_cycles = div64_u64(~0ULL >> 1, tstamp->cycles.mult); + overflow_cycles = min(overflow_cycles, tstamp->cycles.mask >> 1); + + ns = cyclecounter_cyc2ns(&tstamp->cycles, overflow_cycles, frac, &frac); - do_div(ns, NSEC_PER_SEC / 2 / HZ); + do_div(ns, NSEC_PER_SEC / HZ); tstamp->overflow_period = ns; INIT_WORK(&tstamp->pps_info.out_work, mlx5e_pps_out); diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index dba6d17ad885..47d2ef2fb9b3 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -511,7 +511,7 @@ static int phy_start_aneg_priv(struct phy_device *phydev, bool sync) * negotiation may already be done and aneg interrupt may not be * generated. */ - if (phy_interrupt_is_valid(phydev) && (phydev->state == PHY_AN)) { + if (phydev->irq != PHY_POLL && phydev->state == PHY_AN) { err = phy_aneg_done(phydev); if (err > 0) { trigger = true; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index bbdb46916dc3..13d39a72fe0d 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -636,8 +636,61 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr)); } -/* Add new entry to forwarding table -- assumes lock held */ +static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, + const u8 *mac, __u16 state, + __be32 src_vni, __u8 ndm_flags) +{ + struct vxlan_fdb *f; + + f = kmalloc(sizeof(*f), GFP_ATOMIC); + if (!f) + return NULL; + f->state = state; + f->flags = ndm_flags; + f->updated = f->used = jiffies; + f->vni = src_vni; + INIT_LIST_HEAD(&f->remotes); + memcpy(f->eth_addr, mac, ETH_ALEN); + + return f; +} + static int vxlan_fdb_create(struct vxlan_dev *vxlan, + const u8 *mac, union vxlan_addr *ip, + __u16 state, __be16 port, __be32 src_vni, + __be32 vni, __u32 ifindex, __u8 ndm_flags, + struct vxlan_fdb **fdb) +{ + struct vxlan_rdst *rd = NULL; + struct vxlan_fdb *f; + int rc; + + if (vxlan->cfg.addrmax && + vxlan->addrcnt >= vxlan->cfg.addrmax) + return -ENOSPC; + + netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip); + f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags); + if (!f) + return -ENOMEM; + + rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd); + if (rc < 0) { + kfree(f); + return rc; + } + + ++vxlan->addrcnt; + hlist_add_head_rcu(&f->hlist, + vxlan_fdb_head(vxlan, mac, src_vni)); + + *fdb = f; + + return 0; +} + +/* Add new entry to forwarding table -- assumes lock held */ +static int vxlan_fdb_update(struct vxlan_dev *vxlan, const u8 *mac, union vxlan_addr *ip, __u16 state, __u16 flags, __be16 port, __be32 src_vni, __be32 vni, @@ -687,37 +740,17 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, if (!(flags & NLM_F_CREATE)) return -ENOENT; - if (vxlan->cfg.addrmax && - vxlan->addrcnt >= vxlan->cfg.addrmax) - return -ENOSPC; - /* Disallow replace to add a multicast entry */ if ((flags & NLM_F_REPLACE) && (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac))) return -EOPNOTSUPP; netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip); - f = kmalloc(sizeof(*f), GFP_ATOMIC); - if (!f) - return -ENOMEM; - - notify = 1; - f->state = state; - f->flags = ndm_flags; - f->updated = f->used = jiffies; - f->vni = src_vni; - INIT_LIST_HEAD(&f->remotes); - memcpy(f->eth_addr, mac, ETH_ALEN); - - rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd); - if (rc < 0) { - kfree(f); + rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni, + vni, ifindex, ndm_flags, &f); + if (rc < 0) return rc; - } - - ++vxlan->addrcnt; - hlist_add_head_rcu(&f->hlist, - vxlan_fdb_head(vxlan, mac, src_vni)); + notify = 1; } if (notify) { @@ -741,13 +774,15 @@ static void vxlan_fdb_free(struct rcu_head *head) kfree(f); } -static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) +static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f, + bool do_notify) { netdev_dbg(vxlan->dev, "delete %pM\n", f->eth_addr); --vxlan->addrcnt; - vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH); + if (do_notify) + vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH); hlist_del_rcu(&f->hlist); call_rcu(&f->rcu, vxlan_fdb_free); @@ -863,7 +898,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EAFNOSUPPORT; spin_lock_bh(&vxlan->hash_lock); - err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags, + err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags, port, src_vni, vni, ifindex, ndm->ndm_flags); spin_unlock_bh(&vxlan->hash_lock); @@ -897,7 +932,7 @@ static int __vxlan_fdb_delete(struct vxlan_dev *vxlan, goto out; } - vxlan_fdb_destroy(vxlan, f); + vxlan_fdb_destroy(vxlan, f, true); out: return 0; @@ -1006,7 +1041,7 @@ static bool vxlan_snoop(struct net_device *dev, /* close off race between vxlan_flush and incoming packets */ if (netif_running(dev)) - vxlan_fdb_create(vxlan, src_mac, src_ip, + vxlan_fdb_update(vxlan, src_mac, src_ip, NUD_REACHABLE, NLM_F_EXCL|NLM_F_CREATE, vxlan->cfg.dst_port, @@ -2360,7 +2395,7 @@ static void vxlan_cleanup(unsigned long arg) "garbage collect %pM\n", f->eth_addr); f->state = NUD_STALE; - vxlan_fdb_destroy(vxlan, f); + vxlan_fdb_destroy(vxlan, f, true); } else if (time_before(timeout, next_timer)) next_timer = timeout; } @@ -2411,7 +2446,7 @@ static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni) spin_lock_bh(&vxlan->hash_lock); f = __vxlan_find_mac(vxlan, all_zeros_mac, vni); if (f) - vxlan_fdb_destroy(vxlan, f); + vxlan_fdb_destroy(vxlan, f, true); spin_unlock_bh(&vxlan->hash_lock); } @@ -2465,7 +2500,7 @@ static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all) continue; /* the all_zeros_mac entry is deleted at vxlan_uninit */ if (!is_zero_ether_addr(f->eth_addr)) - vxlan_fdb_destroy(vxlan, f); + vxlan_fdb_destroy(vxlan, f, true); } } spin_unlock_bh(&vxlan->hash_lock); @@ -3157,6 +3192,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, { struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_fdb *f = NULL; int err; err = vxlan_dev_configure(net, dev, conf, false, extack); @@ -3170,24 +3206,35 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, err = vxlan_fdb_create(vxlan, all_zeros_mac, &vxlan->default_dst.remote_ip, NUD_REACHABLE | NUD_PERMANENT, - NLM_F_EXCL | NLM_F_CREATE, vxlan->cfg.dst_port, vxlan->default_dst.remote_vni, vxlan->default_dst.remote_vni, vxlan->default_dst.remote_ifindex, - NTF_SELF); + NTF_SELF, &f); if (err) return err; } err = register_netdevice(dev); + if (err) + goto errout; + + err = rtnl_configure_link(dev, NULL); if (err) { - vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni); - return err; + unregister_netdevice(dev); + goto errout; } + /* notify default fdb entry */ + if (f) + vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH); + list_add(&vxlan->next, &vn->vxlan_list); return 0; +errout: + if (f) + vxlan_fdb_destroy(vxlan, f, false); + return err; } static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], @@ -3416,6 +3463,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], struct vxlan_rdst *dst = &vxlan->default_dst; struct vxlan_rdst old_dst; struct vxlan_config conf; + struct vxlan_fdb *f = NULL; int err; err = vxlan_nl2conf(tb, data, @@ -3444,16 +3492,16 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], err = vxlan_fdb_create(vxlan, all_zeros_mac, &dst->remote_ip, NUD_REACHABLE | NUD_PERMANENT, - NLM_F_CREATE | NLM_F_APPEND, vxlan->cfg.dst_port, dst->remote_vni, dst->remote_vni, dst->remote_ifindex, - NTF_SELF); + NTF_SELF, &f); if (err) { spin_unlock_bh(&vxlan->hash_lock); return err; } + vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH); } spin_unlock_bh(&vxlan->hash_lock); } diff --git a/drivers/staging/speakup/speakup_soft.c b/drivers/staging/speakup/speakup_soft.c index d99daf69e501..fe229d63deec 100644 --- a/drivers/staging/speakup/speakup_soft.c +++ b/drivers/staging/speakup/speakup_soft.c @@ -207,11 +207,15 @@ static ssize_t softsynthx_read(struct file *fp, char __user *buf, size_t count, int chars_sent = 0; char __user *cp; char *init; + size_t bytes_per_ch = unicode ? 3 : 1; u16 ch; int empty; unsigned long flags; DEFINE_WAIT(wait); + if (count < bytes_per_ch) + return -EINVAL; + spin_lock_irqsave(&speakup_info.spinlock, flags); while (1) { prepare_to_wait(&speakup_event, &wait, TASK_INTERRUPTIBLE); @@ -237,7 +241,7 @@ static ssize_t softsynthx_read(struct file *fp, char __user *buf, size_t count, init = get_initstring(); /* Keep 3 bytes available for a 16bit UTF-8-encoded character */ - while (chars_sent <= count - 3) { + while (chars_sent <= count - bytes_per_ch) { if (speakup_info.flushing) { speakup_info.flushing = 0; ch = '\x18'; diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 3b9aadd007f5..f2f31fc16f29 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1844,6 +1844,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x09d8, 0x0320), /* Elatec GmbH TWN3 */ .driver_info = NO_UNION_NORMAL, /* has misplaced union descriptor */ }, + { USB_DEVICE(0x0ca6, 0xa050), /* Castles VEGA3000 */ + .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ + }, { USB_DEVICE(0x2912, 0x0001), /* ATOL FPrint */ .driver_info = CLEAR_HALT_CONDITIONS, diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index e5f77e611451..a8bc48b26c23 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1141,10 +1141,14 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) if (!udev || udev->state == USB_STATE_NOTATTACHED) { /* Tell hub_wq to disconnect the device or - * check for a new connection + * check for a new connection or over current condition. + * Based on USB2.0 Spec Section 11.12.5, + * C_PORT_OVER_CURRENT could be set while + * PORT_OVER_CURRENT is not. So check for any of them. */ if (udev || (portstatus & USB_PORT_STAT_CONNECTION) || - (portstatus & USB_PORT_STAT_OVERCURRENT)) + (portstatus & USB_PORT_STAT_OVERCURRENT) || + (portchange & USB_PORT_STAT_C_OVERCURRENT)) set_bit(port1, hub->change_bits); } else if (portstatus & USB_PORT_STAT_ENABLE) { diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 87484f71b2ab..46d3b0fc00c5 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -2606,34 +2606,29 @@ static void dwc2_hc_init_xfer(struct dwc2_hsotg *hsotg, #define DWC2_USB_DMA_ALIGN 4 -struct dma_aligned_buffer { - void *kmalloc_ptr; - void *old_xfer_buffer; - u8 data[0]; -}; - static void dwc2_free_dma_aligned_buffer(struct urb *urb) { - struct dma_aligned_buffer *temp; + void *stored_xfer_buffer; if (!(urb->transfer_flags & URB_ALIGNED_TEMP_BUFFER)) return; - temp = container_of(urb->transfer_buffer, - struct dma_aligned_buffer, data); + /* Restore urb->transfer_buffer from the end of the allocated area */ + memcpy(&stored_xfer_buffer, urb->transfer_buffer + + urb->transfer_buffer_length, sizeof(urb->transfer_buffer)); if (usb_urb_dir_in(urb)) - memcpy(temp->old_xfer_buffer, temp->data, + memcpy(stored_xfer_buffer, urb->transfer_buffer, urb->transfer_buffer_length); - urb->transfer_buffer = temp->old_xfer_buffer; - kfree(temp->kmalloc_ptr); + kfree(urb->transfer_buffer); + urb->transfer_buffer = stored_xfer_buffer; urb->transfer_flags &= ~URB_ALIGNED_TEMP_BUFFER; } static int dwc2_alloc_dma_aligned_buffer(struct urb *urb, gfp_t mem_flags) { - struct dma_aligned_buffer *temp, *kmalloc_ptr; + void *kmalloc_ptr; size_t kmalloc_size; if (urb->num_sgs || urb->sg || @@ -2641,22 +2636,29 @@ static int dwc2_alloc_dma_aligned_buffer(struct urb *urb, gfp_t mem_flags) !((uintptr_t)urb->transfer_buffer & (DWC2_USB_DMA_ALIGN - 1))) return 0; - /* Allocate a buffer with enough padding for alignment */ + /* + * Allocate a buffer with enough padding for original transfer_buffer + * pointer. This allocation is guaranteed to be aligned properly for + * DMA + */ kmalloc_size = urb->transfer_buffer_length + - sizeof(struct dma_aligned_buffer) + DWC2_USB_DMA_ALIGN - 1; + sizeof(urb->transfer_buffer); kmalloc_ptr = kmalloc(kmalloc_size, mem_flags); if (!kmalloc_ptr) return -ENOMEM; - /* Position our struct dma_aligned_buffer such that data is aligned */ - temp = PTR_ALIGN(kmalloc_ptr + 1, DWC2_USB_DMA_ALIGN) - 1; - temp->kmalloc_ptr = kmalloc_ptr; - temp->old_xfer_buffer = urb->transfer_buffer; + /* + * Position value of original urb->transfer_buffer pointer to the end + * of allocation for later referencing + */ + memcpy(kmalloc_ptr + urb->transfer_buffer_length, + &urb->transfer_buffer, sizeof(urb->transfer_buffer)); + if (usb_urb_dir_out(urb)) - memcpy(temp->data, urb->transfer_buffer, + memcpy(kmalloc_ptr, urb->transfer_buffer, urb->transfer_buffer_length); - urb->transfer_buffer = temp->data; + urb->transfer_buffer = kmalloc_ptr; urb->transfer_flags |= URB_ALIGNED_TEMP_BUFFER; diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 7b53ac548b1a..52e6897fa35a 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3243,7 +3243,7 @@ static int ffs_func_setup(struct usb_function *f, __ffs_event_add(ffs, FUNCTIONFS_SETUP); spin_unlock_irqrestore(&ffs->ev.waitq.lock, flags); - return USB_GADGET_DELAYED_STATUS; + return creq->wLength == 0 ? USB_GADGET_DELAYED_STATUS : 0; } static bool ffs_func_req_match(struct usb_function *f, diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index b751dd60e41a..b4c68f3b82be 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -467,7 +467,7 @@ static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container, if (!mem) return -EINVAL; - ret = mm_iommu_ua_to_hpa(mem, tce, phpa); + ret = mm_iommu_ua_to_hpa(mem, tce, shift, phpa); if (ret) return -EINVAL; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 0480cd9a9e81..71b81980787f 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -338,10 +338,7 @@ smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon, return rc; /* BB eventually switch this to SMB2 specific small buf size */ - if (smb2_command == SMB2_SET_INFO) - *request_buf = cifs_buf_get(); - else - *request_buf = cifs_small_buf_get(); + *request_buf = cifs_small_buf_get(); if (*request_buf == NULL) { /* BB should we add a retry in here if not a writepage? */ return -ENOMEM; @@ -3171,7 +3168,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, } rc = SendReceive2(xid, ses, iov, num, &resp_buftype, flags, &rsp_iov); - cifs_buf_release(req); + cifs_small_buf_release(req); rsp = (struct smb2_set_info_rsp *)rsp_iov.iov_base; if (rc != 0) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9cf971c68401..6dd77767fd5b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3167,6 +3167,8 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) return __skb_grow(skb, len); } +#define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode) + #define skb_queue_walk(queue, skb) \ for (skb = (queue)->next; \ skb != (struct sk_buff *)(queue); \ diff --git a/include/net/tcp.h b/include/net/tcp.h index fb653736f335..3173dd12b8cc 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -372,6 +372,7 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); +void tcp_enter_quickack_mode(struct sock *sk); static inline void tcp_dec_quickack_mode(struct sock *sk, const unsigned int pkts) { @@ -560,6 +561,7 @@ void tcp_send_fin(struct sock *sk); void tcp_send_active_reset(struct sock *sk, gfp_t priority); int tcp_send_synack(struct sock *); void tcp_push_one(struct sock *, unsigned int mss_now); +void __tcp_send_ack(struct sock *sk, u32 rcv_nxt); void tcp_send_ack(struct sock *sk); void tcp_send_delayed_ack(struct sock *sk); void tcp_send_loss_probe(struct sock *sk); @@ -857,6 +859,11 @@ struct tcp_skb_cb { * as TCP moves IP6CB into a different location in skb->cb[] */ static inline int tcp_v6_iif(const struct sk_buff *skb) +{ + return TCP_SKB_CB(skb)->header.h6.iif; +} + +static inline int tcp_v6_iif_l3_slave(const struct sk_buff *skb) { bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4cfdad08aca0..efe396cc77b5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2402,9 +2402,12 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) return err; } - dev->rtnl_link_state = RTNL_LINK_INITIALIZED; - - __dev_notify_flags(dev, old_flags, ~0U); + if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) { + __dev_notify_flags(dev, old_flags, 0U); + } else { + dev->rtnl_link_state = RTNL_LINK_INITIALIZED; + __dev_notify_flags(dev, old_flags, ~0U); + } return 0; } EXPORT_SYMBOL(rtnl_configure_link); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 23041b5c0b27..2e5eeba97de9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3675,6 +3675,7 @@ normal: net_warn_ratelimited( "skb_segment: too many frags: %u %u\n", pos, mss); + err = -EINVAL; goto err; } @@ -3713,11 +3714,10 @@ skip_fraglist: perform_csum_check: if (!csum) { - if (skb_has_shared_frag(nskb)) { - err = __skb_linearize(nskb); - if (err) - goto err; - } + if (skb_has_shared_frag(nskb) && + __skb_linearize(nskb)) + goto err; + if (!nskb->remcsum_offload) nskb->ip_summed = CHECKSUM_NONE; SKB_GSO_CB(nskb)->csum = diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index fbeb35ad804b..502aae3e3ab8 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1201,8 +1201,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) if (pmc) { im->interface = pmc->interface; im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; - im->sfmode = pmc->sfmode; - if (pmc->sfmode == MCAST_INCLUDE) { + if (im->sfmode == MCAST_INCLUDE) { im->tomb = pmc->tomb; im->sources = pmc->sources; for (psf = im->sources; psf; psf = psf->sf_next) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 63d5d66e040a..e2dd325bed9b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -523,6 +523,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->dev = from->dev; to->mark = from->mark; + skb_copy_hash(to, from); + /* Copy the flags to each fragment. */ IPCB(to)->flags = IPCB(from)->flags; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index d07ba4d5917b..048d5f6dd320 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -148,15 +148,18 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) { struct sockaddr_in sin; const struct iphdr *iph = ip_hdr(skb); - __be16 *ports = (__be16 *)skb_transport_header(skb); + __be16 *ports; + int end; - if (skb_transport_offset(skb) + 4 > (int)skb->len) + end = skb_transport_offset(skb) + 4; + if (end > 0 && !pskb_may_pull(skb, end)) return; /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. */ + ports = (__be16 *)skb_transport_header(skb); sin.sin_family = AF_INET; sin.sin_addr.s_addr = iph->daddr; diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 5f5e5936760e..c78fb53988a1 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -131,23 +131,14 @@ static void dctcp_ce_state_0_to_1(struct sock *sk) struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - /* State has changed from CE=0 to CE=1 and delayed - * ACK has not sent yet. - */ - if (!ca->ce_state && ca->delayed_ack_reserved) { - u32 tmp_rcv_nxt; - - /* Save current rcv_nxt. */ - tmp_rcv_nxt = tp->rcv_nxt; - - /* Generate previous ack with CE=0. */ - tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; - tp->rcv_nxt = ca->prior_rcv_nxt; - - tcp_send_ack(sk); - - /* Recover current rcv_nxt. */ - tp->rcv_nxt = tmp_rcv_nxt; + if (!ca->ce_state) { + /* State has changed from CE=0 to CE=1, force an immediate + * ACK to reflect the new CE state. If an ACK was delayed, + * send that first to reflect the prior CE state. + */ + if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) + __tcp_send_ack(sk, ca->prior_rcv_nxt); + tcp_enter_quickack_mode(sk); } ca->prior_rcv_nxt = tp->rcv_nxt; @@ -161,23 +152,14 @@ static void dctcp_ce_state_1_to_0(struct sock *sk) struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - /* State has changed from CE=1 to CE=0 and delayed - * ACK has not sent yet. - */ - if (ca->ce_state && ca->delayed_ack_reserved) { - u32 tmp_rcv_nxt; - - /* Save current rcv_nxt. */ - tmp_rcv_nxt = tp->rcv_nxt; - - /* Generate previous ack with CE=1. */ - tp->ecn_flags |= TCP_ECN_DEMAND_CWR; - tp->rcv_nxt = ca->prior_rcv_nxt; - - tcp_send_ack(sk); - - /* Recover current rcv_nxt. */ - tp->rcv_nxt = tmp_rcv_nxt; + if (ca->ce_state) { + /* State has changed from CE=1 to CE=0, force an immediate + * ACK to reflect the new CE state. If an ACK was delayed, + * send that first to reflect the prior CE state. + */ + if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) + __tcp_send_ack(sk, ca->prior_rcv_nxt); + tcp_enter_quickack_mode(sk); } ca->prior_rcv_nxt = tp->rcv_nxt; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5711b1b12d28..b86e7b8beb1d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -209,13 +209,14 @@ static void tcp_incr_quickack(struct sock *sk) icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); } -static void tcp_enter_quickack_mode(struct sock *sk) +void tcp_enter_quickack_mode(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); tcp_incr_quickack(sk); icsk->icsk_ack.pingpong = 0; icsk->icsk_ack.ato = TCP_ATO_MIN; } +EXPORT_SYMBOL(tcp_enter_quickack_mode); /* Send ACKs quickly, if "quick" count is not exhausted * and the session is not interactive. @@ -4331,6 +4332,23 @@ static bool tcp_try_coalesce(struct sock *sk, return true; } +static bool tcp_ooo_try_coalesce(struct sock *sk, + struct sk_buff *to, + struct sk_buff *from, + bool *fragstolen) +{ + bool res = tcp_try_coalesce(sk, OOO_QUEUE, to, from, fragstolen); + + /* In case tcp_drop() is called later, update to->gso_segs */ + if (res) { + u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) + + max_t(u16, 1, skb_shinfo(from)->gso_segs); + + skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF); + } + return res; +} + static void tcp_drop(struct sock *sk, struct sk_buff *skb) { sk_drops_add(sk, skb); @@ -4462,8 +4480,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) /* In the typical case, we are adding an skb to the end of the list. * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. */ - if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb, - skb, &fragstolen)) { + if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb, + skb, &fragstolen)) { coalesce_done: tcp_grow_window(sk, skb); kfree_skb_partial(skb, fragstolen); @@ -4491,7 +4509,7 @@ coalesce_done: /* All the bits are present. Drop. */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - __kfree_skb(skb); + tcp_drop(sk, skb); skb = NULL; tcp_dsack_set(sk, seq, end_seq); goto add_sack; @@ -4510,11 +4528,11 @@ coalesce_done: TCP_SKB_CB(skb1)->end_seq); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - __kfree_skb(skb1); + tcp_drop(sk, skb1); goto merge_right; } - } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1, - skb, &fragstolen)) { + } else if (tcp_ooo_try_coalesce(sk, skb1, + skb, &fragstolen)) { goto coalesce_done; } p = &parent->rb_right; @@ -4876,6 +4894,7 @@ end: static void tcp_collapse_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + u32 range_truesize, sum_tiny = 0; struct sk_buff *skb, *head; struct rb_node *p; u32 start, end; @@ -4894,6 +4913,7 @@ new_range: } start = TCP_SKB_CB(skb)->seq; end = TCP_SKB_CB(skb)->end_seq; + range_truesize = skb->truesize; for (head = skb;;) { skb = tcp_skb_next(skb, NULL); @@ -4904,11 +4924,20 @@ new_range: if (!skb || after(TCP_SKB_CB(skb)->seq, end) || before(TCP_SKB_CB(skb)->end_seq, start)) { - tcp_collapse(sk, NULL, &tp->out_of_order_queue, - head, skb, start, end); + /* Do not attempt collapsing tiny skbs */ + if (range_truesize != head->truesize || + end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) { + tcp_collapse(sk, NULL, &tp->out_of_order_queue, + head, skb, start, end); + } else { + sum_tiny += range_truesize; + if (sum_tiny > sk->sk_rcvbuf >> 3) + return; + } goto new_range; } + range_truesize += skb->truesize; if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) start = TCP_SKB_CB(skb)->seq; if (after(TCP_SKB_CB(skb)->end_seq, end)) @@ -4923,6 +4952,7 @@ new_range: * 2) not add too big latencies if thousands of packets sit there. * (But if application shrinks SO_RCVBUF, we could still end up * freeing whole queue here) + * 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks. * * Return true if queue has shrunk. */ @@ -4930,20 +4960,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct rb_node *node, *prev; + int goal; if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) return false; NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); + goal = sk->sk_rcvbuf >> 3; node = &tp->ooo_last_skb->rbnode; do { prev = rb_prev(node); rb_erase(node, &tp->out_of_order_queue); + goal -= rb_to_skb(node)->truesize; tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode)); - sk_mem_reclaim(sk); - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && - !tcp_under_memory_pressure(sk)) - break; + if (!prev || goal <= 0) { + sk_mem_reclaim(sk); + if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && + !tcp_under_memory_pressure(sk)) + break; + goal = sk->sk_rcvbuf >> 3; + } node = prev; } while (node); tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode); @@ -4978,6 +5014,9 @@ static int tcp_prune_queue(struct sock *sk) else if (tcp_under_memory_pressure(sk)) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); + if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) + return 0; + tcp_collapse_ofo_queue(sk); if (!skb_queue_empty(&sk->sk_receive_queue)) tcp_collapse(sk, &sk->sk_receive_queue, NULL, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index abae5196cd3a..3d8f6f342cb1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -175,8 +175,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp, } /* Account for an ACK we sent. */ -static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) +static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts, + u32 rcv_nxt) { + struct tcp_sock *tp = tcp_sk(sk); + + if (unlikely(rcv_nxt != tp->rcv_nxt)) + return; /* Special ACK sent by DCTCP to reflect ECN */ tcp_dec_quickack_mode(sk, pkts); inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } @@ -984,8 +989,8 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb) * We are working here with either a clone of the original * SKB, or a fresh unique copy made by the retransmit engine. */ -static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, - gfp_t gfp_mask) +static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, + int clone_it, gfp_t gfp_mask, u32 rcv_nxt) { const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet; @@ -1057,7 +1062,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->source = inet->inet_sport; th->dest = inet->inet_dport; th->seq = htonl(tcb->seq); - th->ack_seq = htonl(tp->rcv_nxt); + th->ack_seq = htonl(rcv_nxt); *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->tcp_flags); @@ -1098,7 +1103,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, icsk->icsk_af_ops->send_check(sk, skb); if (likely(tcb->tcp_flags & TCPHDR_ACK)) - tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); + tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt); if (skb->len != tcp_header_size) { tcp_event_data_sent(tp, sk); @@ -1135,6 +1140,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, return err; } +static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, + gfp_t gfp_mask) +{ + return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask, + tcp_sk(sk)->rcv_nxt); +} + /* This routine just queues the buffer for sending. * * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, @@ -3551,7 +3563,7 @@ void tcp_send_delayed_ack(struct sock *sk) } /* This routine sends an ack and also updates the window. */ -void tcp_send_ack(struct sock *sk) +void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) { struct sk_buff *buff; @@ -3586,9 +3598,14 @@ void tcp_send_ack(struct sock *sk) skb_set_tcp_pure_ack(buff); /* Send it off, this clears delayed acks for us. */ - tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0); + __tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt); +} +EXPORT_SYMBOL_GPL(__tcp_send_ack); + +void tcp_send_ack(struct sock *sk) +{ + __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt); } -EXPORT_SYMBOL_GPL(tcp_send_ack); /* This routine sends a packet with an out of date sequence * number. It assumes the other end will try to ack it. diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 453dc3726199..461825e0680f 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -708,13 +708,16 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, } if (np->rxopt.bits.rxorigdstaddr) { struct sockaddr_in6 sin6; - __be16 *ports = (__be16 *) skb_transport_header(skb); + __be16 *ports; + int end; - if (skb_transport_offset(skb) + 4 <= (int)skb->len) { + end = skb_transport_offset(skb) + 4; + if (end <= 0 || pskb_may_pull(skb, end)) { /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. */ + ports = (__be16 *)skb_transport_header(skb); sin6.sin6_family = AF_INET6; sin6.sin6_addr = ipv6_hdr(skb)->daddr; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 5acb54405b10..c5f2b17b7ee1 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -405,9 +405,10 @@ static int icmp6_iif(const struct sk_buff *skb) /* for local traffic to local address, skb dev is the loopback * device. Check if there is a dst attached to the skb and if so - * get the real device index. + * get the real device index. Same is needed for replies to a link + * local address on a device enslaved to an L3 master device */ - if (unlikely(iif == LOOPBACK_IFINDEX)) { + if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) { const struct rt6_info *rt6 = skb_rt6_info(skb); if (rt6) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 32fcce711855..1da021527fcd 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -595,6 +595,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->dev = from->dev; to->mark = from->mark; + skb_copy_hash(to, from); + #ifdef CONFIG_NET_SCHED to->tc_index = from->tc_index; #endif diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9a38a2c641fa..6fd913d63835 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -771,8 +771,7 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) if (pmc) { im->idev = pmc->idev; im->mca_crcount = idev->mc_qrv; - im->mca_sfmode = pmc->mca_sfmode; - if (pmc->mca_sfmode == MCAST_INCLUDE) { + if (im->mca_sfmode == MCAST_INCLUDE) { im->mca_tomb = pmc->mca_tomb; im->mca_sources = pmc->mca_sources; for (psf = im->mca_sources; psf; psf = psf->sf_next) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 35e8aef9ceed..ba8586aadffa 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -918,7 +918,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) &tcp_hashinfo, NULL, 0, &ipv6h->saddr, th->source, &ipv6h->daddr, - ntohs(th->source), tcp_v6_iif(skb), + ntohs(th->source), + tcp_v6_iif_l3_slave(skb), tcp_v6_sdif(skb)); if (!sk1) goto out; @@ -1573,7 +1574,8 @@ do_time_wait: skb, __tcp_hdrlen(th), &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, - ntohs(th->dest), tcp_v6_iif(skb), + ntohs(th->dest), + tcp_v6_iif_l3_slave(skb), sdif); if (sk2) { struct inet_timewait_sock *tw = inet_twsk(sk); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 8ee4e667a414..fb79caf56d0e 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -135,9 +135,10 @@ static int alloc_sg(struct sock *sk, int len, struct scatterlist *sg, pfrag->offset += use; sge = sg + num_elem - 1; - if (num_elem > first_coalesce && sg_page(sg) == pfrag->page && - sg->offset + sg->length == orig_offset) { - sg->length += use; + + if (num_elem > first_coalesce && sg_page(sge) == pfrag->page && + sge->offset + sge->length == orig_offset) { + sge->length += use; } else { sge++; sg_unmark_end(sge);