diff --git a/Makefile b/Makefile index 86d227774ae6..22c91fa0411e 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ VERSION = 3 PATCHLEVEL = 14 -SUBLEVEL = 53 +SUBLEVEL = 54 EXTRAVERSION = -NAME = Remembering Coco +NAME = Kernel Recipes 2015 # *DOCUMENTATION* # To see a list of typical targets execute "make help" diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 65b788410bd9..9a406627b0ef 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -68,6 +68,10 @@ config NO_IOPORT config STACKTRACE_SUPPORT def_bool y +config ILLEGAL_POINTER_VALUE + hex + default 0xdead000000000000 + config LOCKDEP_SUPPORT def_bool y @@ -302,6 +306,22 @@ menu "CPU Power Management" source "drivers/cpuidle/Kconfig" +config ARM64_ERRATUM_843419 + bool "Cortex-A53: 843419: A load or store might access an incorrect address" + depends on MODULES + default y + help + This option builds kernel modules using the large memory model in + order to avoid the use of the ADRP instruction, which can cause + a subsequent memory access to use an incorrect address on Cortex-A53 + parts up to r0p4. + + Note that the kernel itself must be linked with a version of ld + which fixes potentially affected ADRP instructions through the + use of veneers. + + If unsure, say Y. + endmenu source "net/Kconfig" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 2fceb71ac3b7..0ab1a34dab58 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -34,6 +34,10 @@ comma = , CHECKFLAGS += -D__aarch64__ +ifeq ($(CONFIG_ARM64_ERRATUM_843419), y) +CFLAGS_MODULE += -mcmodel=large +endif + # Default value head-y := arch/arm64/kernel/head.o diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 0b281fffda51..150492b6cd02 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -203,6 +203,11 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr hstr_el2, xzr // Disable CP15 traps to EL2 #endif + /* EL2 debug */ + mrs x0, pmcr_el0 // Disable debug access traps + ubfx x0, x0, #11, #5 // to EL2 and allow access to + msr mdcr_el2, x0 // all PMU counters from EL1 + /* Stage-2 translation */ msr vttbr_el2, xzr diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 1eb1cc955139..e366329d96d8 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -330,12 +330,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, AARCH64_INSN_IMM_ADR); break; +#ifndef CONFIG_ARM64_ERRATUM_843419 case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; case R_AARCH64_ADR_PREL_PG_HI21: ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, AARCH64_INSN_IMM_ADR); break; +#endif case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC: overflow_check = false; diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index a966baccf1c0..cbe646fa340b 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -203,14 +203,32 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) /* * VFP save/restore code. + * + * We have to be careful with endianness, since the fpsimd context-switch + * code operates on 128-bit (Q) register values whereas the compat ABI + * uses an array of 64-bit (D) registers. Consequently, we need to swap + * the two halves of each Q register when running on a big-endian CPU. */ +union __fpsimd_vreg { + __uint128_t raw; + struct { +#ifdef __AARCH64EB__ + u64 hi; + u64 lo; +#else + u64 lo; + u64 hi; +#endif + }; +}; + static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) { struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr, fpexc; - int err = 0; + int i, err = 0; /* * Save the hardware registers to the fpsimd_state structure. @@ -226,10 +244,15 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) /* * Now copy the FP registers. Since the registers are packed, * we can copy the prefix we want (V0-V15) as it is. - * FIXME: Won't work if big endian. */ - err |= __copy_to_user(&frame->ufp.fpregs, fpsimd->vregs, - sizeof(frame->ufp.fpregs)); + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg = { + .raw = fpsimd->vregs[i >> 1], + }; + + __put_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __put_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + } /* Create an AArch32 fpscr from the fpsr and the fpcr. */ fpscr = (fpsimd->fpsr & VFP_FPSCR_STAT_MASK) | @@ -254,7 +277,7 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr; - int err = 0; + int i, err = 0; __get_user_error(magic, &frame->magic, err); __get_user_error(size, &frame->size, err); @@ -264,12 +287,14 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) return -EINVAL; - /* - * Copy the FP registers into the start of the fpsimd_state. - * FIXME: Won't work if big endian. - */ - err |= __copy_from_user(fpsimd.vregs, frame->ufp.fpregs, - sizeof(frame->ufp.fpregs)); + /* Copy the FP registers into the start of the fpsimd_state. */ + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg; + + __get_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __get_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + fpsimd.vregs[i >> 1] = vreg.raw; + } /* Extract the fpsr and the fpcr from the fpscr */ __get_user_error(fpscr, &frame->ufp.fpscr, err); diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 3aaf3bc4ad8a..1343b2020891 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -485,8 +485,6 @@ CPU_BE( rev w5, w5 ) mrs x3, cntv_ctl_el0 and x3, x3, #3 str w3, [x0, #VCPU_TIMER_CNTV_CTL] - bic x3, x3, #1 // Clear Enable - msr cntv_ctl_el0, x3 isb @@ -494,6 +492,9 @@ CPU_BE( rev w5, w5 ) str x3, [x0, #VCPU_TIMER_CNTV_CVAL] 1: + // Disable the virtual timer + msr cntv_ctl_el0, xzr + // Allow physical timer/counter access for the host mrs x2, cnthctl_el2 orr x2, x2, #3 diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 8ceac4785609..1ce320e4d3d8 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -507,8 +507,8 @@ void do_cpu_irq_mask(struct pt_regs *regs) struct pt_regs *old_regs; unsigned long eirr_val; int irq, cpu = smp_processor_id(); -#ifdef CONFIG_SMP struct irq_desc *desc; +#ifdef CONFIG_SMP cpumask_t dest; #endif @@ -521,8 +521,12 @@ void do_cpu_irq_mask(struct pt_regs *regs) goto set_out; irq = eirr_to_irq(eirr_val); -#ifdef CONFIG_SMP + /* Filter out spurious interrupts, mostly from serial port at bootup */ desc = irq_to_desc(irq); + if (unlikely(!desc->action)) + goto set_out; + +#ifdef CONFIG_SMP cpumask_copy(&dest, desc->irq_data.affinity); if (irqd_is_per_cpu(&desc->irq_data) && !cpu_isset(smp_processor_id(), dest)) { diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 7ef22e3387e0..0b8d26d3ba43 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -821,7 +821,7 @@ cas2_action: /* 64bit CAS */ #ifdef CONFIG_64BIT 19: ldd,ma 0(%sr3,%r26), %r29 - sub,= %r29, %r25, %r0 + sub,*= %r29, %r25, %r0 b,n cas2_end 20: std,ma %r24, 0(%sr3,%r26) copy %r0, %r28 diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 7b3d54fae46f..7356053b1133 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -135,7 +135,19 @@ #define pte_iterate_hashed_end() } while(0) #ifdef CONFIG_PPC_HAS_HASH_64K -#define pte_pagesize_index(mm, addr, pte) get_slice_psize(mm, addr) +/* + * We expect this to be called only for user addresses or kernel virtual + * addresses other than the linear mapping. + */ +#define pte_pagesize_index(mm, addr, pte) \ + ({ \ + unsigned int psize; \ + if (is_kernel_addr(addr)) \ + psize = MMU_PAGE_4K; \ + else \ + psize = get_slice_psize(mm, addr); \ + psize; \ + }) #else #define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K #endif diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 9bd52c65e66f..14de1385dedb 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -255,6 +255,7 @@ extern void rtas_power_off(void); extern void rtas_halt(void); extern void rtas_os_term(char *str); extern int rtas_get_sensor(int sensor, int index, int *state); +extern int rtas_get_sensor_fast(int sensor, int index, int *state); extern int rtas_get_power_level(int powerdomain, int *level); extern int rtas_set_power_level(int powerdomain, int level, int *setlevel); extern bool rtas_indicator_present(int token, int *maxindex); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 4cf674d7d5ae..c4bc8d6cfd79 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -584,6 +584,23 @@ int rtas_get_sensor(int sensor, int index, int *state) } EXPORT_SYMBOL(rtas_get_sensor); +int rtas_get_sensor_fast(int sensor, int index, int *state) +{ + int token = rtas_token("get-sensor-state"); + int rc; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + rc = rtas_call(token, 2, 2, state, sensor, index); + WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN && + rc <= RTAS_EXTENDED_DELAY_MAX)); + + if (rc < 0) + return rtas_error_rc(rc); + return rc; +} + bool rtas_indicator_present(int token, int *maxindex) { int proplen, count, i; diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 5f5e6328c21c..5061c6f676da 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -136,7 +136,6 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, BUG_ON(index >= 4096); vpn = hpt_vpn(ea, vsid, ssize); - hash = hpt_hash(vpn, shift, ssize); hpte_slot_array = get_hpte_slot_array(pmdp); if (psize == MMU_PAGE_4K) { /* @@ -151,6 +150,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, valid = hpte_valid(hpte_slot_array, index); if (valid) { /* update the hpte bits */ + hash = hpt_hash(vpn, shift, ssize); hidx = hpte_hash_index(hpte_slot_array, index); if (hidx & _PTEIDX_SECONDARY) hash = ~hash; @@ -176,6 +176,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, if (!valid) { unsigned long hpte_group; + hash = hpt_hash(vpn, shift, ssize); /* insert new entry */ pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; new_pmd |= _PAGE_HASHPTE; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 721c0586b284..50fd3ac7b7bf 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -187,7 +187,8 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) int state; int critical; - status = rtas_get_sensor(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state); + status = rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, + &state); if (state > 3) critical = 1; /* Time Critical */ diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 06469ee0f26e..6d6ab2b0bdfa 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1702,11 +1702,12 @@ ENTRY(nmi) * If the variable is not set and the stack is not the NMI * stack then: * o Set the special variable on the stack - * o Copy the interrupt frame into a "saved" location on the stack - * o Copy the interrupt frame into a "copy" location on the stack + * o Copy the interrupt frame into an "outermost" location on the + * stack + * o Copy the interrupt frame into an "iret" location on the stack * o Continue processing the NMI * If the variable is set or the previous stack is the NMI stack: - * o Modify the "copy" location to jump to the repeate_nmi + * o Modify the "iret" location to jump to the repeat_nmi * o return back to the first NMI * * Now on exit of the first NMI, we first clear the stack variable @@ -1715,52 +1716,184 @@ ENTRY(nmi) * a nested NMI that updated the copy interrupt stack frame, a * jump will be made to the repeat_nmi code that will handle the second * NMI. + * + * However, espfix prevents us from directly returning to userspace + * with a single IRET instruction. Similarly, IRET to user mode + * can fault. We therefore handle NMIs from user space like + * other IST entries. */ /* Use %rdx as out temp variable throughout */ pushq_cfi %rdx CFI_REL_OFFSET rdx, 0 + testb $3, CS-RIP+8(%rsp) + jz .Lnmi_from_kernel + + /* + * NMI from user mode. We need to run on the thread stack, but we + * can't go through the normal entry paths: NMIs are masked, and + * we don't want to enable interrupts, because then we'll end + * up in an awkward situation in which IRQs are on but NMIs + * are off. + */ + SWAPGS + cld + movq %rsp, %rdx + movq PER_CPU_VAR(kernel_stack), %rsp + addq $KERNEL_STACK_OFFSET, %rsp + pushq 5*8(%rdx) /* pt_regs->ss */ + pushq 4*8(%rdx) /* pt_regs->rsp */ + pushq 3*8(%rdx) /* pt_regs->flags */ + pushq 2*8(%rdx) /* pt_regs->cs */ + pushq 1*8(%rdx) /* pt_regs->rip */ + pushq $-1 /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + pushq (%rdx) /* pt_regs->dx */ + pushq %rcx /* pt_regs->cx */ + pushq %rax /* pt_regs->ax */ + pushq %r8 /* pt_regs->r8 */ + pushq %r9 /* pt_regs->r9 */ + pushq %r10 /* pt_regs->r10 */ + pushq %r11 /* pt_regs->r11 */ + pushq %rbx /* pt_regs->rbx */ + pushq %rbp /* pt_regs->rbp */ + pushq %r12 /* pt_regs->r12 */ + pushq %r13 /* pt_regs->r13 */ + pushq %r14 /* pt_regs->r14 */ + pushq %r15 /* pt_regs->r15 */ + + /* + * At this point we no longer need to worry about stack damage + * due to nesting -- we're on the normal thread stack and we're + * done with the NMI stack. + */ + movq %rsp, %rdi + movq $-1, %rsi + call do_nmi + + /* + * Return back to user mode. We must *not* do the normal exit + * work, because we don't want to enable interrupts. Fortunately, + * do_nmi doesn't modify pt_regs. + */ + SWAPGS + + /* + * Open-code the entire return process for compatibility with varying + * register layouts across different kernel versions. + */ + addq $6*8, %rsp /* skip bx, bp, and r12-r15 */ + popq %r11 /* pt_regs->r11 */ + popq %r10 /* pt_regs->r10 */ + popq %r9 /* pt_regs->r9 */ + popq %r8 /* pt_regs->r8 */ + popq %rax /* pt_regs->ax */ + popq %rcx /* pt_regs->cx */ + popq %rdx /* pt_regs->dx */ + popq %rsi /* pt_regs->si */ + popq %rdi /* pt_regs->di */ + addq $8, %rsp /* skip orig_ax */ + INTERRUPT_RETURN + +.Lnmi_from_kernel: /* - * If %cs was not the kernel segment, then the NMI triggered in user - * space, which means it is definitely not nested. + * Here's what our stack frame will look like: + * +---------------------------------------------------------+ + * | original SS | + * | original Return RSP | + * | original RFLAGS | + * | original CS | + * | original RIP | + * +---------------------------------------------------------+ + * | temp storage for rdx | + * +---------------------------------------------------------+ + * | "NMI executing" variable | + * +---------------------------------------------------------+ + * | iret SS } Copied from "outermost" frame | + * | iret Return RSP } on each loop iteration; overwritten | + * | iret RFLAGS } by a nested NMI to force another | + * | iret CS } iteration if needed. | + * | iret RIP } | + * +---------------------------------------------------------+ + * | outermost SS } initialized in first_nmi; | + * | outermost Return RSP } will not be changed before | + * | outermost RFLAGS } NMI processing is done. | + * | outermost CS } Copied to "iret" frame on each | + * | outermost RIP } iteration. | + * +---------------------------------------------------------+ + * | pt_regs | + * +---------------------------------------------------------+ + * + * The "original" frame is used by hardware. Before re-enabling + * NMIs, we need to be done with it, and we need to leave enough + * space for the asm code here. + * + * We return by executing IRET while RSP points to the "iret" frame. + * That will either return for real or it will loop back into NMI + * processing. + * + * The "outermost" frame is copied to the "iret" frame on each + * iteration of the loop, so each iteration starts with the "iret" + * frame pointing to the final return target. */ - cmpl $__KERNEL_CS, 16(%rsp) - jne first_nmi /* - * Check the special variable on the stack to see if NMIs are - * executing. + * Determine whether we're a nested NMI. + * + * If we interrupted kernel code between repeat_nmi and + * end_repeat_nmi, then we are a nested NMI. We must not + * modify the "iret" frame because it's being written by + * the outer NMI. That's okay; the outer NMI handler is + * about to about to call do_nmi anyway, so we can just + * resume the outer NMI. + */ + movq $repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja 1f + movq $end_repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja nested_nmi_out +1: + + /* + * Now check "NMI executing". If it's set, then we're nested. + * This will not detect if we interrupted an outer NMI just + * before IRET. */ cmpl $1, -8(%rsp) je nested_nmi /* - * Now test if the previous stack was an NMI stack. - * We need the double check. We check the NMI stack to satisfy the - * race when the first NMI clears the variable before returning. - * We check the variable because the first NMI could be in a - * breakpoint routine using a breakpoint stack. + * Now test if the previous stack was an NMI stack. This covers + * the case where we interrupt an outer NMI after it clears + * "NMI executing" but before IRET. We need to be careful, though: + * there is one case in which RSP could point to the NMI stack + * despite there being no NMI active: naughty userspace controls + * RSP at the very beginning of the SYSCALL targets. We can + * pull a fast one on naughty userspace, though: we program + * SYSCALL to mask DF, so userspace cannot cause DF to be set + * if it controls the kernel's RSP. We set DF before we clear + * "NMI executing". */ lea 6*8(%rsp), %rdx test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi + + /* Ah, it is within the NMI stack. */ + + testb $(X86_EFLAGS_DF >> 8), (3*8 + 1)(%rsp) + jz first_nmi /* RSP was user controlled. */ + + /* This is a nested NMI. */ + CFI_REMEMBER_STATE nested_nmi: /* - * Do nothing if we interrupted the fixup in repeat_nmi. - * It's about to repeat the NMI handler, so we are fine - * with ignoring this one. + * Modify the "iret" frame to point to repeat_nmi, forcing another + * iteration of NMI handling. */ - movq $repeat_nmi, %rdx - cmpq 8(%rsp), %rdx - ja 1f - movq $end_repeat_nmi, %rdx - cmpq 8(%rsp), %rdx - ja nested_nmi_out - -1: - /* Set up the interrupted NMIs stack to jump to repeat_nmi */ leaq -1*8(%rsp), %rdx movq %rdx, %rsp CFI_ADJUST_CFA_OFFSET 1*8 @@ -1779,60 +1912,23 @@ nested_nmi_out: popq_cfi %rdx CFI_RESTORE rdx - /* No need to check faults here */ + /* We are returning to kernel mode, so this cannot result in a fault. */ INTERRUPT_RETURN CFI_RESTORE_STATE first_nmi: - /* - * Because nested NMIs will use the pushed location that we - * stored in rdx, we must keep that space available. - * Here's what our stack frame will look like: - * +-------------------------+ - * | original SS | - * | original Return RSP | - * | original RFLAGS | - * | original CS | - * | original RIP | - * +-------------------------+ - * | temp storage for rdx | - * +-------------------------+ - * | NMI executing variable | - * +-------------------------+ - * | copied SS | - * | copied Return RSP | - * | copied RFLAGS | - * | copied CS | - * | copied RIP | - * +-------------------------+ - * | Saved SS | - * | Saved Return RSP | - * | Saved RFLAGS | - * | Saved CS | - * | Saved RIP | - * +-------------------------+ - * | pt_regs | - * +-------------------------+ - * - * The saved stack frame is used to fix up the copied stack frame - * that a nested NMI may change to make the interrupted NMI iret jump - * to the repeat_nmi. The original stack frame and the temp storage - * is also used by nested NMIs and can not be trusted on exit. - */ - /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ + /* Restore rdx. */ movq (%rsp), %rdx CFI_RESTORE rdx - /* Set the NMI executing variable on the stack. */ + /* Set "NMI executing" on the stack. */ pushq_cfi $1 - /* - * Leave room for the "copied" frame - */ + /* Leave room for the "iret" frame */ subq $(5*8), %rsp CFI_ADJUST_CFA_OFFSET 5*8 - /* Copy the stack frame to the Saved frame */ + /* Copy the "original" frame to the "outermost" frame */ .rept 5 pushq_cfi 11*8(%rsp) .endr @@ -1840,6 +1936,7 @@ first_nmi: /* Everything up to here is safe from nested NMIs */ +repeat_nmi: /* * If there was a nested NMI, the first NMI's iret will return * here. But NMIs are still enabled and we can take another @@ -1848,16 +1945,21 @@ first_nmi: * it will just return, as we are about to repeat an NMI anyway. * This makes it safe to copy to the stack frame that a nested * NMI will update. - */ -repeat_nmi: - /* - * Update the stack variable to say we are still in NMI (the update - * is benign for the non-repeat case, where 1 was pushed just above - * to this very stack slot). + * + * RSP is pointing to "outermost RIP". gsbase is unknown, but, if + * we're repeating an NMI, gsbase has the same value that it had on + * the first iteration. paranoid_entry will load the kernel + * gsbase if needed before we call do_nmi. + * + * Set "NMI executing" in case we came back here via IRET. */ movq $1, 10*8(%rsp) - /* Make another copy, this one may be modified by nested NMIs */ + /* + * Copy the "outermost" frame to the "iret" frame. NMIs that nest + * here must not modify the "iret" frame while we're writing to + * it or it will end up containing garbage. + */ addq $(10*8), %rsp CFI_ADJUST_CFA_OFFSET -10*8 .rept 5 @@ -1868,9 +1970,9 @@ repeat_nmi: end_repeat_nmi: /* - * Everything below this point can be preempted by a nested - * NMI if the first NMI took an exception and reset our iret stack - * so that we repeat another NMI. + * Everything below this point can be preempted by a nested NMI. + * If this happens, then the inner NMI will change the "iret" + * frame to point back to repeat_nmi. */ pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ subq $ORIG_RAX-R15, %rsp @@ -1885,28 +1987,10 @@ end_repeat_nmi: call save_paranoid DEFAULT_FRAME 0 - /* - * Save off the CR2 register. If we take a page fault in the NMI then - * it could corrupt the CR2 value. If the NMI preempts a page fault - * handler before it was able to read the CR2 register, and then the - * NMI itself takes a page fault, the page fault that was preempted - * will read the information from the NMI page fault and not the - * origin fault. Save it off and restore it if it changes. - * Use the r12 callee-saved register. - */ - movq %cr2, %r12 - /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ movq %rsp,%rdi movq $-1,%rsi call do_nmi - - /* Did the NMI take a page fault? Restore cr2 if it did */ - movq %cr2, %rcx - cmpq %rcx, %r12 - je 1f - movq %r12, %cr2 -1: testl %ebx,%ebx /* swapgs needed? */ jnz nmi_restore @@ -1916,9 +2000,23 @@ nmi_restore: /* Pop the extra iret frame at once */ RESTORE_ALL 6*8 - /* Clear the NMI executing stack variable */ - movq $0, 5*8(%rsp) - jmp irq_return + /* + * Clear "NMI executing". Set DF first so that we can easily + * distinguish the remaining code between here and IRET from + * the SYSCALL entry and exit paths. On a native kernel, we + * could just inspect RIP, but, on paravirt kernels, + * INTERRUPT_RETURN can translate into a jump into a + * hypercall page. + */ + std + movq $0, 5*8(%rsp) /* clear "NMI executing" */ + + /* + * INTERRUPT_RETURN reads the "iret" frame and exits the NMI + * stack in a single instruction. We are returning to kernel + * mode, so this cannot result in a fault. + */ + INTERRUPT_RETURN CFI_ENDPROC END(nmi) diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 6fcb49ce50a1..8facfb318a97 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -392,15 +392,15 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) } /* - * NMIs can hit breakpoints which will cause it to lose its - * NMI context with the CPU when the breakpoint does an iret. - */ -#ifdef CONFIG_X86_32 -/* - * For i386, NMIs use the same stack as the kernel, and we can - * add a workaround to the iret problem in C (preventing nested - * NMIs if an NMI takes a trap). Simply have 3 states the NMI - * can be in: + * NMIs can page fault or hit breakpoints which will cause it to lose + * its NMI context with the CPU when the breakpoint or page fault does an IRET. + * + * As a result, NMIs can nest if NMIs get unmasked due an IRET during + * NMI processing. On x86_64, the asm glue protects us from nested NMIs + * if the outer NMI came from kernel mode, but we can still nest if the + * outer NMI came from user mode. + * + * To handle these nested NMIs, we have three states: * * 1) not running * 2) executing @@ -414,15 +414,14 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) * (Note, the latch is binary, thus multiple NMIs triggering, * when one is running, are ignored. Only one NMI is restarted.) * - * If an NMI hits a breakpoint that executes an iret, another - * NMI can preempt it. We do not want to allow this new NMI - * to run, but we want to execute it when the first one finishes. - * We set the state to "latched", and the exit of the first NMI will - * perform a dec_return, if the result is zero (NOT_RUNNING), then - * it will simply exit the NMI handler. If not, the dec_return - * would have set the state to NMI_EXECUTING (what we want it to - * be when we are running). In this case, we simply jump back - * to rerun the NMI handler again, and restart the 'latched' NMI. + * If an NMI executes an iret, another NMI can preempt it. We do not + * want to allow this new NMI to run, but we want to execute it when the + * first one finishes. We set the state to "latched", and the exit of + * the first NMI will perform a dec_return, if the result is zero + * (NOT_RUNNING), then it will simply exit the NMI handler. If not, the + * dec_return would have set the state to NMI_EXECUTING (what we want it + * to be when we are running). In this case, we simply jump back to + * rerun the NMI handler again, and restart the 'latched' NMI. * * No trap (breakpoint or page fault) should be hit before nmi_restart, * thus there is no race between the first check of state for NOT_RUNNING @@ -445,49 +444,36 @@ enum nmi_states { static DEFINE_PER_CPU(enum nmi_states, nmi_state); static DEFINE_PER_CPU(unsigned long, nmi_cr2); -#define nmi_nesting_preprocess(regs) \ - do { \ - if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \ - this_cpu_write(nmi_state, NMI_LATCHED); \ - return; \ - } \ - this_cpu_write(nmi_state, NMI_EXECUTING); \ - this_cpu_write(nmi_cr2, read_cr2()); \ - } while (0); \ - nmi_restart: - -#define nmi_nesting_postprocess() \ - do { \ - if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) \ - write_cr2(this_cpu_read(nmi_cr2)); \ - if (this_cpu_dec_return(nmi_state)) \ - goto nmi_restart; \ - } while (0) -#else /* x86_64 */ +#ifdef CONFIG_X86_64 /* - * In x86_64 things are a bit more difficult. This has the same problem - * where an NMI hitting a breakpoint that calls iret will remove the - * NMI context, allowing a nested NMI to enter. What makes this more - * difficult is that both NMIs and breakpoints have their own stack. - * When a new NMI or breakpoint is executed, the stack is set to a fixed - * point. If an NMI is nested, it will have its stack set at that same - * fixed address that the first NMI had, and will start corrupting the - * stack. This is handled in entry_64.S, but the same problem exists with - * the breakpoint stack. + * In x86_64, we need to handle breakpoint -> NMI -> breakpoint. Without + * some care, the inner breakpoint will clobber the outer breakpoint's + * stack. * - * If a breakpoint is being processed, and the debug stack is being used, - * if an NMI comes in and also hits a breakpoint, the stack pointer - * will be set to the same fixed address as the breakpoint that was - * interrupted, causing that stack to be corrupted. To handle this case, - * check if the stack that was interrupted is the debug stack, and if - * so, change the IDT so that new breakpoints will use the current stack - * and not switch to the fixed address. On return of the NMI, switch back - * to the original IDT. + * If a breakpoint is being processed, and the debug stack is being + * used, if an NMI comes in and also hits a breakpoint, the stack + * pointer will be set to the same fixed address as the breakpoint that + * was interrupted, causing that stack to be corrupted. To handle this + * case, check if the stack that was interrupted is the debug stack, and + * if so, change the IDT so that new breakpoints will use the current + * stack and not switch to the fixed address. On return of the NMI, + * switch back to the original IDT. */ static DEFINE_PER_CPU(int, update_debug_stack); +#endif -static inline void nmi_nesting_preprocess(struct pt_regs *regs) +dotraplinkage notrace void +do_nmi(struct pt_regs *regs, long error_code) { + if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { + this_cpu_write(nmi_state, NMI_LATCHED); + return; + } + this_cpu_write(nmi_state, NMI_EXECUTING); + this_cpu_write(nmi_cr2, read_cr2()); +nmi_restart: + +#ifdef CONFIG_X86_64 /* * If we interrupted a breakpoint, it is possible that * the nmi handler will have breakpoints too. We need to @@ -498,22 +484,8 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs) debug_stack_set_zero(); this_cpu_write(update_debug_stack, 1); } -} - -static inline void nmi_nesting_postprocess(void) -{ - if (unlikely(this_cpu_read(update_debug_stack))) { - debug_stack_reset(); - this_cpu_write(update_debug_stack, 0); - } -} #endif -dotraplinkage notrace __kprobes void -do_nmi(struct pt_regs *regs, long error_code) -{ - nmi_nesting_preprocess(regs); - nmi_enter(); inc_irq_stat(__nmi_count); @@ -523,8 +495,17 @@ do_nmi(struct pt_regs *regs, long error_code) nmi_exit(); - /* On i386, may loop back to preprocess */ - nmi_nesting_postprocess(); +#ifdef CONFIG_X86_64 + if (unlikely(this_cpu_read(update_debug_stack))) { + debug_stack_reset(); + this_cpu_write(update_debug_stack, 0); + } +#endif + + if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) + write_cr2(this_cpu_read(nmi_cr2)); + if (this_cpu_dec_return(nmi_state)) + goto nmi_restart; } void stop_nmi(void) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index e39504878aec..a7b5b3071072 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -137,6 +137,7 @@ page_table_range_init_count(unsigned long start, unsigned long end) vaddr = start; pgd_idx = pgd_index(vaddr); + pmd_idx = pmd_index(vaddr); for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd_idx++) { for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index b91ce75bd35d..d2102c4eefbf 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -141,15 +141,26 @@ static ssize_t blk_mq_sysfs_completed_show(struct blk_mq_ctx *ctx, char *page) static ssize_t sysfs_list_show(char *page, struct list_head *list, char *msg) { - char *start_page = page; struct request *rq; + int len = snprintf(page, PAGE_SIZE - 1, "%s:\n", msg); + + list_for_each_entry(rq, list, queuelist) { + const int rq_len = 2 * sizeof(rq) + 2; + + /* if the output will be truncated */ + if (PAGE_SIZE - 1 < len + rq_len) { + /* backspacing if it can't hold '\t...\n' */ + if (PAGE_SIZE - 1 < len + 5) + len -= rq_len; + len += snprintf(page + len, PAGE_SIZE - 1 - len, + "\t...\n"); + break; + } + len += snprintf(page + len, PAGE_SIZE - 1 - len, + "\t%p\n", rq); + } - page += sprintf(page, "%s:\n", msg); - - list_for_each_entry(rq, list, queuelist) - page += sprintf(page, "\t%p\n", rq); - - return page - start_page; + return len; } static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page) diff --git a/drivers/base/node.c b/drivers/base/node.c index bc9f43bf7e29..5f5160c9455f 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -399,6 +399,16 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { int page_nid; + /* + * memory block could have several absent sections from start. + * skip pfn range from absent section + */ + if (!pfn_present(pfn)) { + pfn = round_down(pfn + PAGES_PER_SECTION, + PAGES_PER_SECTION) - 1; + continue; + } + page_nid = get_nid_for_pfn(pfn); if (page_nid < 0) continue; diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 51824d1f23ea..2d3385d3c6a9 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -2464,6 +2464,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto release; + kref_init(&dev->kref); result = nvme_dev_start(dev); if (result) { if (result == -EBUSY) @@ -2471,7 +2472,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto release_pools; } - kref_init(&dev->kref); result = nvme_dev_add(dev); if (result) goto shutdown; diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 79a266934327..fd2820ff9e5b 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -3387,6 +3387,14 @@ void radeon_combios_asic_init(struct drm_device *dev) rdev->pdev->subsystem_device == 0x30ae) return; + /* quirk for rs4xx HP Compaq dc5750 Small Form Factor to make it resume + * - it hangs on resume inside the dynclk 1 table. + */ + if (rdev->family == CHIP_RS480 && + rdev->pdev->subsystem_vendor == 0x103c && + rdev->pdev->subsystem_device == 0x280a) + return; + /* DYN CLK 1 */ table = combios_get_table_offset(dev, COMBIOS_DYN_CLK_1_TABLE); if (table) diff --git a/drivers/iio/gyro/Kconfig b/drivers/iio/gyro/Kconfig index 6f64c5cc5387..ac2d69e34c8c 100644 --- a/drivers/iio/gyro/Kconfig +++ b/drivers/iio/gyro/Kconfig @@ -93,8 +93,7 @@ config IIO_ST_GYRO_SPI_3AXIS config ITG3200 tristate "InvenSense ITG3200 Digital 3-Axis Gyroscope I2C driver" depends on I2C - select IIO_BUFFER - select IIO_TRIGGERED_BUFFER + select IIO_TRIGGERED_BUFFER if IIO_BUFFER help Say yes here to add support for the InvenSense ITG3200 digital 3-axis gyroscope sensor. diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index a283274a5a09..639557bdc0cd 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -85,7 +85,7 @@ */ struct ib_uverbs_device { - struct kref ref; + atomic_t refcount; int num_comp_vectors; struct completion comp; struct device *dev; @@ -94,6 +94,7 @@ struct ib_uverbs_device { struct cdev cdev; struct rb_root xrcd_tree; struct mutex xrcd_tree_mutex; + struct kobject kobj; }; struct ib_uverbs_event_file { diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 2adc14372b94..5e66eab292d8 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2111,6 +2111,12 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, next->send_flags = user_wr->send_flags; if (is_ud) { + if (next->opcode != IB_WR_SEND && + next->opcode != IB_WR_SEND_WITH_IMM) { + ret = -EINVAL; + goto out_put; + } + next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext); if (!next->wr.ud.ah) { @@ -2150,9 +2156,11 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, user_wr->wr.atomic.compare_add; next->wr.atomic.swap = user_wr->wr.atomic.swap; next->wr.atomic.rkey = user_wr->wr.atomic.rkey; + case IB_WR_SEND: break; default: - break; + ret = -EINVAL; + goto out_put; } } diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 7a515c867674..8802d5ccd93d 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -127,14 +127,18 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device); -static void ib_uverbs_release_dev(struct kref *ref) +static void ib_uverbs_release_dev(struct kobject *kobj) { struct ib_uverbs_device *dev = - container_of(ref, struct ib_uverbs_device, ref); + container_of(kobj, struct ib_uverbs_device, kobj); - complete(&dev->comp); + kfree(dev); } +static struct kobj_type ib_uverbs_dev_ktype = { + .release = ib_uverbs_release_dev, +}; + static void ib_uverbs_release_event_file(struct kref *ref) { struct ib_uverbs_event_file *file = @@ -298,13 +302,19 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, return context->device->dealloc_ucontext(context); } +static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) +{ + complete(&dev->comp); +} + static void ib_uverbs_release_file(struct kref *ref) { struct ib_uverbs_file *file = container_of(ref, struct ib_uverbs_file, ref); module_put(file->device->ib_dev->owner); - kref_put(&file->device->ref, ib_uverbs_release_dev); + if (atomic_dec_and_test(&file->device->refcount)) + ib_uverbs_comp_dev(file->device); kfree(file); } @@ -734,9 +744,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) int ret; dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev); - if (dev) - kref_get(&dev->ref); - else + if (!atomic_inc_not_zero(&dev->refcount)) return -ENXIO; if (!try_module_get(dev->ib_dev->owner)) { @@ -757,6 +765,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) mutex_init(&file->mutex); filp->private_data = file; + kobject_get(&dev->kobj); return nonseekable_open(inode, filp); @@ -764,13 +773,16 @@ err_module: module_put(dev->ib_dev->owner); err: - kref_put(&dev->ref, ib_uverbs_release_dev); + if (atomic_dec_and_test(&dev->refcount)) + ib_uverbs_comp_dev(dev); + return ret; } static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; + struct ib_uverbs_device *dev = file->device; ib_uverbs_cleanup_ucontext(file, file->ucontext); @@ -778,6 +790,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) kref_put(&file->async_file->ref, ib_uverbs_release_event_file); kref_put(&file->ref, ib_uverbs_release_file); + kobject_put(&dev->kobj); return 0; } @@ -873,10 +886,11 @@ static void ib_uverbs_add_one(struct ib_device *device) if (!uverbs_dev) return; - kref_init(&uverbs_dev->ref); + atomic_set(&uverbs_dev->refcount, 1); init_completion(&uverbs_dev->comp); uverbs_dev->xrcd_tree = RB_ROOT; mutex_init(&uverbs_dev->xrcd_tree_mutex); + kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype); spin_lock(&map_lock); devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); @@ -903,6 +917,7 @@ static void ib_uverbs_add_one(struct ib_device *device) cdev_init(&uverbs_dev->cdev, NULL); uverbs_dev->cdev.owner = THIS_MODULE; uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; + uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj; kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum); if (cdev_add(&uverbs_dev->cdev, base, 1)) goto err_cdev; @@ -933,9 +948,10 @@ err_cdev: clear_bit(devnum, overflow_map); err: - kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); + if (atomic_dec_and_test(&uverbs_dev->refcount)) + ib_uverbs_comp_dev(uverbs_dev); wait_for_completion(&uverbs_dev->comp); - kfree(uverbs_dev); + kobject_put(&uverbs_dev->kobj); return; } @@ -955,9 +971,10 @@ static void ib_uverbs_remove_one(struct ib_device *device) else clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map); - kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); + if (atomic_dec_and_test(&uverbs_dev->refcount)) + ib_uverbs_comp_dev(uverbs_dev); wait_for_completion(&uverbs_dev->comp); - kfree(uverbs_dev); + kobject_put(&uverbs_dev->kobj); } static char *uverbs_devnode(struct device *dev, umode_t *mode) diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 170dca608042..1ddcebd84622 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -147,9 +147,13 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) enum rdma_link_layer ll; memset(ah_attr, 0, sizeof *ah_attr); - ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24; ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num); + if (ll == IB_LINK_LAYER_ETHERNET) + ah_attr->sl = be32_to_cpu(ah->av.eth.sl_tclass_flowlabel) >> 29; + else + ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; + ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0; if (ah->av.ib.stat_rate) ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET; diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index db2ea31df832..b5e50c605933 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -563,6 +563,8 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave) struct mlx4_port *p; int i; int ret; + int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port_num) == + IB_LINK_LAYER_ETHERNET; p = kzalloc(sizeof *p, GFP_KERNEL); if (!p) @@ -580,7 +582,8 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave) p->pkey_group.name = "pkey_idx"; p->pkey_group.attrs = - alloc_group_attrs(show_port_pkey, store_port_pkey, + alloc_group_attrs(show_port_pkey, + is_eth ? NULL : store_port_pkey, dev->dev->caps.pkey_table_len[port_num]); if (!p->pkey_group.attrs) { ret = -ENOMEM; diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index 3b9afccaaade..eabe54738be6 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c @@ -86,6 +86,10 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) * unrestricted LKEY. */ rkt->gen++; + /* + * bits are capped in qib_verbs.c to insure enough bits + * for generation number + */ mr->lkey = (r << (32 - ib_qib_lkey_table_size)) | ((((1 << (24 - ib_qib_lkey_table_size)) - 1) & rkt->gen) << 8); diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 092b0bb1bb78..c141b9b2493d 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "qib.h" #include "qib_common.h" @@ -2086,10 +2087,16 @@ int qib_register_ib_device(struct qib_devdata *dd) * the LKEY). The remaining bits act as a generation number or tag. */ spin_lock_init(&dev->lk_table.lock); + /* insure generation is at least 4 bits see keys.c */ + if (ib_qib_lkey_table_size > MAX_LKEY_TABLE_BITS) { + qib_dev_warn(dd, "lkey bits %u too large, reduced to %u\n", + ib_qib_lkey_table_size, MAX_LKEY_TABLE_BITS); + ib_qib_lkey_table_size = MAX_LKEY_TABLE_BITS; + } dev->lk_table.max = 1 << ib_qib_lkey_table_size; lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); dev->lk_table.table = (struct qib_mregion __rcu **) - __get_free_pages(GFP_KERNEL, get_order(lk_tab_size)); + vmalloc(lk_tab_size); if (dev->lk_table.table == NULL) { ret = -ENOMEM; goto err_lk; @@ -2262,7 +2269,7 @@ err_tx: sizeof(struct qib_pio_header), dev->pio_hdrs, dev->pio_hdrs_phys); err_hdrs: - free_pages((unsigned long) dev->lk_table.table, get_order(lk_tab_size)); + vfree(dev->lk_table.table); err_lk: kfree(dev->qp_table); err_qpt: @@ -2316,8 +2323,7 @@ void qib_unregister_ib_device(struct qib_devdata *dd) sizeof(struct qib_pio_header), dev->pio_hdrs, dev->pio_hdrs_phys); lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); - free_pages((unsigned long) dev->lk_table.table, - get_order(lk_tab_size)); + vfree(dev->lk_table.table); kfree(dev->qp_table); } diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index a01c7d2cf541..d34bc69f1f17 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -647,6 +647,8 @@ struct qib_qpn_table { struct qpn_map map[QPNMAP_ENTRIES]; }; +#define MAX_LKEY_TABLE_BITS 23 + struct qib_lkey_table { spinlock_t lock; /* protect changes in this struct */ u32 next; /* next unused index (speeds search) */ diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index fb787c3e88d9..bb9f073a4e13 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -240,19 +240,14 @@ static int evdev_flush(struct file *file, fl_owner_t id) { struct evdev_client *client = file->private_data; struct evdev *evdev = client->evdev; - int retval; - retval = mutex_lock_interruptible(&evdev->mutex); - if (retval) - return retval; + mutex_lock(&evdev->mutex); - if (!evdev->exist || client->revoked) - retval = -ENODEV; - else - retval = input_flush_device(&evdev->handle, file); + if (evdev->exist && !client->revoked) + input_flush_device(&evdev->handle, file); mutex_unlock(&evdev->mutex); - return retval; + return 0; } static void evdev_free(struct device *dev) diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c index 8c91fd5eb6fd..3ac9c4194814 100644 --- a/drivers/isdn/gigaset/ser-gigaset.c +++ b/drivers/isdn/gigaset/ser-gigaset.c @@ -524,9 +524,18 @@ gigaset_tty_open(struct tty_struct *tty) cs->hw.ser->tty = tty; atomic_set(&cs->hw.ser->refcnt, 1); init_completion(&cs->hw.ser->dead_cmp); - tty->disc_data = cs; + /* Set the amount of data we're willing to receive per call + * from the hardware driver to half of the input buffer size + * to leave some reserve. + * Note: We don't do flow control towards the hardware driver. + * If more data is received than will fit into the input buffer, + * it will be dropped and an error will be logged. This should + * never happen as the device is slow and the buffer size ample. + */ + tty->receive_room = RBUFSIZE/2; + /* OK.. Initialization of the datastructures and the HW is done.. Now * startup system and notify the LL that we are ready to run */ diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index a46124ecafc7..73c9f579b042 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3585,6 +3585,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) /* far_copies must be 1 */ conf->prev.stride = conf->dev_sectors; } + conf->reshape_safe = conf->reshape_progress; spin_lock_init(&conf->device_lock); INIT_LIST_HEAD(&conf->retry_list); @@ -3793,7 +3794,6 @@ static int run(struct mddev *mddev) } conf->offset_diff = min_offset_diff; - conf->reshape_safe = conf->reshape_progress; clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); @@ -4138,6 +4138,7 @@ static int raid10_start_reshape(struct mddev *mddev) conf->reshape_progress = size; } else conf->reshape_progress = 0; + conf->reshape_safe = conf->reshape_progress; spin_unlock_irq(&conf->device_lock); if (mddev->delta_disks && mddev->bitmap) { @@ -4204,6 +4205,7 @@ abort: rdev->new_data_offset = rdev->data_offset; smp_wmb(); conf->reshape_progress = MaxSector; + conf->reshape_safe = MaxSector; mddev->reshape_position = MaxSector; spin_unlock_irq(&conf->device_lock); return ret; @@ -4556,6 +4558,7 @@ static void end_reshape(struct r10conf *conf) md_finish_reshape(conf->mddev); smp_wmb(); conf->reshape_progress = MaxSector; + conf->reshape_safe = MaxSector; spin_unlock_irq(&conf->device_lock); /* read-ahead size must cover two whole stripes, which is diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c index 5807185262fe..ee0015be1421 100644 --- a/drivers/media/platform/omap3isp/isp.c +++ b/drivers/media/platform/omap3isp/isp.c @@ -824,14 +824,14 @@ static int isp_pipeline_link_notify(struct media_link *link, u32 flags, int ret; if (notification == MEDIA_DEV_NOTIFY_POST_LINK_CH && - !(link->flags & MEDIA_LNK_FL_ENABLED)) { + !(flags & MEDIA_LNK_FL_ENABLED)) { /* Powering off entities is assumed to never fail. */ isp_pipeline_pm_power(source, -sink_use); isp_pipeline_pm_power(sink, -source_use); return 0; } - if (notification == MEDIA_DEV_NOTIFY_POST_LINK_CH && + if (notification == MEDIA_DEV_NOTIFY_PRE_LINK_CH && (flags & MEDIA_LNK_FL_ENABLED)) { ret = isp_pipeline_pm_power(source, sink_use); diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index 02e2f38c9c85..bdd3609c7641 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -982,9 +982,6 @@ static int rc_dev_uevent(struct device *device, struct kobj_uevent_env *env) { struct rc_dev *dev = to_rc_dev(device); - if (!dev || !dev->input_dev) - return -ENODEV; - if (dev->rc_map.name) ADD_HOTPLUG_VAR("NAME=%s", dev->rc_map.name); if (dev->driver_name) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 6a881ebe5b02..4a50b5049c51 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -329,8 +329,10 @@ EXPORT_SYMBOL(mmc_start_bkops); */ static void mmc_wait_data_done(struct mmc_request *mrq) { - mrq->host->context_info.is_done_rcv = true; - wake_up_interruptible(&mrq->host->context_info.wait); + struct mmc_context_info *context_info = &mrq->host->context_info; + + context_info->is_done_rcv = true; + wake_up_interruptible(&context_info->wait); } static void mmc_wait_done(struct mmc_request *mrq) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 32b0e7055b1e..0697d8f5f3cf 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -636,6 +636,23 @@ static void bond_set_dev_addr(struct net_device *bond_dev, call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev); } +static struct slave *bond_get_old_active(struct bonding *bond, + struct slave *new_active) +{ + struct slave *slave; + struct list_head *iter; + + bond_for_each_slave(bond, slave, iter) { + if (slave == new_active) + continue; + + if (ether_addr_equal(bond->dev->dev_addr, slave->dev->dev_addr)) + return slave; + } + + return NULL; +} + /* * bond_do_fail_over_mac * @@ -672,6 +689,9 @@ static void bond_do_fail_over_mac(struct bonding *bond, write_unlock_bh(&bond->curr_slave_lock); + if (!old_active) + old_active = bond_get_old_active(bond, new_active); + if (old_active) { memcpy(tmp_mac, new_active->dev->dev_addr, ETH_ALEN); memcpy(saddr.sa_data, old_active->dev->dev_addr, @@ -1825,6 +1845,7 @@ static int bond_release_and_destroy(struct net_device *bond_dev, bond_dev->priv_flags |= IFF_DISABLE_NETPOLL; pr_info("%s: destroying bond %s.\n", bond_dev->name, bond_dev->name); + bond_remove_proc_entry(bond); unregister_netdevice(bond_dev); } return ret; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index bc65dc85a622..91e7286ab1e1 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -10737,7 +10737,7 @@ static ssize_t tg3_show_temp(struct device *dev, tg3_ape_scratchpad_read(tp, &temperature, attr->index, sizeof(temperature)); spin_unlock_bh(&tp->lock); - return sprintf(buf, "%u\n", temperature); + return sprintf(buf, "%u\n", temperature * 1000); } diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 669eeb4eb247..f051783f5882 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -674,6 +674,7 @@ bnad_cq_process(struct bnad *bnad, struct bna_ccb *ccb, int budget) if (!next_cmpl->valid) break; } + packets++; /* TODO: BNA_CQ_EF_LOCAL ? */ if (unlikely(flags & (BNA_CQ_EF_MAC_ERROR | @@ -690,7 +691,6 @@ bnad_cq_process(struct bnad *bnad, struct bna_ccb *ccb, int budget) else bnad_cq_setup_skb_frags(rcb, skb, sop_ci, nvecs, len); - packets++; rcb->rxq->rx_packets++; rcb->rxq->rx_bytes += totlen; ccb->bytes_per_intr += totlen; diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 8992b38578d5..e8a1baa87c95 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -557,7 +557,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN" " to slave: %d, port:%d\n", __func__, i, port); - s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; + s_info = &priv->mfunc.master.vf_oper[i].vport[port].state; if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) mlx4_slave_event(dev, i, eqe); } else { /* IB port */ @@ -583,7 +583,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) for (i = 0; i < dev->num_slaves; i++) { if (i == mlx4_master_func_num(dev)) continue; - s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; + s_info = &priv->mfunc.master.vf_oper[i].vport[port].state; if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) mlx4_slave_event(dev, i, eqe); } diff --git a/drivers/net/ethernet/stmicro/stmmac/descs.h b/drivers/net/ethernet/stmicro/stmmac/descs.h index ad3996038018..799c2929c536 100644 --- a/drivers/net/ethernet/stmicro/stmmac/descs.h +++ b/drivers/net/ethernet/stmicro/stmmac/descs.h @@ -158,6 +158,8 @@ struct dma_desc { u32 buffer2_size:13; u32 reserved4:3; } etx; /* -- enhanced -- */ + + u64 all_flags; } des01; unsigned int des2; unsigned int des3; diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 7e6628a91514..59fb7f69841b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -240,6 +240,7 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x, static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, int end) { + p->des01.all_flags = 0; p->des01.erx.own = 1; p->des01.erx.buffer1_size = BUF_SIZE_8KiB - 1; @@ -254,7 +255,7 @@ static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, static void enh_desc_init_tx_desc(struct dma_desc *p, int mode, int end) { - p->des01.etx.own = 0; + p->des01.all_flags = 0; if (mode == STMMAC_CHAIN_MODE) ehn_desc_tx_set_on_chain(p, end); else diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index 35ad4f427ae2..48c3456445b2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -123,6 +123,7 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x, static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, int end) { + p->des01.all_flags = 0; p->des01.rx.own = 1; p->des01.rx.buffer1_size = BUF_SIZE_2KiB - 1; @@ -137,7 +138,7 @@ static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, static void ndesc_init_tx_desc(struct dma_desc *p, int mode, int end) { - p->des01.tx.own = 0; + p->des01.all_flags = 0; if (mode == STMMAC_CHAIN_MODE) ndesc_tx_set_on_chain(p, end); else diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 8543e1cfd55e..582e0b3cf5ba 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1142,41 +1142,41 @@ static int alloc_dma_desc_resources(struct stmmac_priv *priv) goto err_tx_skbuff; if (priv->extend_desc) { - priv->dma_erx = dma_alloc_coherent(priv->device, rxsize * - sizeof(struct - dma_extended_desc), - &priv->dma_rx_phy, - GFP_KERNEL); + priv->dma_erx = dma_zalloc_coherent(priv->device, rxsize * + sizeof(struct + dma_extended_desc), + &priv->dma_rx_phy, + GFP_KERNEL); if (!priv->dma_erx) goto err_dma; - priv->dma_etx = dma_alloc_coherent(priv->device, txsize * - sizeof(struct - dma_extended_desc), - &priv->dma_tx_phy, - GFP_KERNEL); + priv->dma_etx = dma_zalloc_coherent(priv->device, txsize * + sizeof(struct + dma_extended_desc), + &priv->dma_tx_phy, + GFP_KERNEL); if (!priv->dma_etx) { dma_free_coherent(priv->device, priv->dma_rx_size * - sizeof(struct dma_extended_desc), - priv->dma_erx, priv->dma_rx_phy); + sizeof(struct dma_extended_desc), + priv->dma_erx, priv->dma_rx_phy); goto err_dma; } } else { - priv->dma_rx = dma_alloc_coherent(priv->device, rxsize * - sizeof(struct dma_desc), - &priv->dma_rx_phy, - GFP_KERNEL); + priv->dma_rx = dma_zalloc_coherent(priv->device, rxsize * + sizeof(struct dma_desc), + &priv->dma_rx_phy, + GFP_KERNEL); if (!priv->dma_rx) goto err_dma; - priv->dma_tx = dma_alloc_coherent(priv->device, txsize * - sizeof(struct dma_desc), - &priv->dma_tx_phy, - GFP_KERNEL); + priv->dma_tx = dma_zalloc_coherent(priv->device, txsize * + sizeof(struct dma_desc), + &priv->dma_tx_phy, + GFP_KERNEL); if (!priv->dma_tx) { dma_free_coherent(priv->device, priv->dma_rx_size * - sizeof(struct dma_desc), - priv->dma_rx, priv->dma_rx_phy); + sizeof(struct dma_desc), + priv->dma_rx, priv->dma_rx_phy); goto err_dma; } } diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index f9e96c427558..23dc1316c1c2 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -778,7 +778,7 @@ int usbnet_stop (struct net_device *net) { struct usbnet *dev = netdev_priv(net); struct driver_info *info = dev->driver_info; - int retval, pm; + int retval, pm, mpn; clear_bit(EVENT_DEV_OPEN, &dev->flags); netif_stop_queue (net); @@ -809,6 +809,8 @@ int usbnet_stop (struct net_device *net) usbnet_purge_paused_rxq(dev); + mpn = !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags); + /* deferred work (task, timer, softirq) must also stop. * can't flush_scheduled_work() until we drop rtnl (later), * else workers could deadlock; so make workers a NOP. @@ -819,8 +821,7 @@ int usbnet_stop (struct net_device *net) if (!pm) usb_autopm_put_interface(dev->intf); - if (info->manage_power && - !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags)) + if (info->manage_power && mpn) info->manage_power(dev, 0); else usb_autopm_put_interface(dev->intf); diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c index 66c92a16da29..a35d1dedeffb 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c @@ -314,6 +314,7 @@ static struct usb_device_id rtl8192c_usb_ids[] = { {RTL_USB_DEVICE(0x07b8, 0x8188, rtl92cu_hal_cfg)}, /*Abocom - Abocom*/ {RTL_USB_DEVICE(0x07b8, 0x8189, rtl92cu_hal_cfg)}, /*Funai - Abocom*/ {RTL_USB_DEVICE(0x0846, 0x9041, rtl92cu_hal_cfg)}, /*NetGear WNA1000M*/ + {RTL_USB_DEVICE(0x0846, 0x9043, rtl92cu_hal_cfg)}, /*NG WNA1000Mv2*/ {RTL_USB_DEVICE(0x0b05, 0x17ba, rtl92cu_hal_cfg)}, /*ASUS-Edimax*/ {RTL_USB_DEVICE(0x0bda, 0x5088, rtl92cu_hal_cfg)}, /*Thinkware-CC&C*/ {RTL_USB_DEVICE(0x0df6, 0x0052, rtl92cu_hal_cfg)}, /*Sitecom - Edimax*/ diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index ff3c98f1ea95..91cc44611062 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -67,7 +67,7 @@ struct gntdev_priv { * Only populated if populate_freeable_maps == 1 */ struct list_head freeable_maps; /* lock protects maps and freeable_maps */ - spinlock_t lock; + struct mutex lock; struct mm_struct *mm; struct mmu_notifier mn; }; @@ -216,9 +216,9 @@ static void gntdev_put_map(struct gntdev_priv *priv, struct grant_map *map) } if (populate_freeable_maps && priv) { - spin_lock(&priv->lock); + mutex_lock(&priv->lock); list_del(&map->next); - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); } if (map->pages && !use_ptemod) @@ -387,9 +387,9 @@ static void gntdev_vma_close(struct vm_area_struct *vma) * not do any unmapping, since that has been done prior to * closing the vma, but it may still iterate the unmap_ops list. */ - spin_lock(&priv->lock); + mutex_lock(&priv->lock); map->vma = NULL; - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); } vma->vm_private_data = NULL; gntdev_put_map(priv, map); @@ -433,14 +433,14 @@ static void mn_invl_range_start(struct mmu_notifier *mn, struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn); struct grant_map *map; - spin_lock(&priv->lock); + mutex_lock(&priv->lock); list_for_each_entry(map, &priv->maps, next) { unmap_if_in_range(map, start, end); } list_for_each_entry(map, &priv->freeable_maps, next) { unmap_if_in_range(map, start, end); } - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); } static void mn_invl_page(struct mmu_notifier *mn, @@ -457,7 +457,7 @@ static void mn_release(struct mmu_notifier *mn, struct grant_map *map; int err; - spin_lock(&priv->lock); + mutex_lock(&priv->lock); list_for_each_entry(map, &priv->maps, next) { if (!map->vma) continue; @@ -476,7 +476,7 @@ static void mn_release(struct mmu_notifier *mn, err = unmap_grant_pages(map, /* offset */ 0, map->count); WARN_ON(err); } - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); } static struct mmu_notifier_ops gntdev_mmu_ops = { @@ -498,7 +498,7 @@ static int gntdev_open(struct inode *inode, struct file *flip) INIT_LIST_HEAD(&priv->maps); INIT_LIST_HEAD(&priv->freeable_maps); - spin_lock_init(&priv->lock); + mutex_init(&priv->lock); if (use_ptemod) { priv->mm = get_task_mm(current); @@ -574,10 +574,10 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, return -EFAULT; } - spin_lock(&priv->lock); + mutex_lock(&priv->lock); gntdev_add_map(priv, map); op.index = map->index << PAGE_SHIFT; - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); if (copy_to_user(u, &op, sizeof(op)) != 0) return -EFAULT; @@ -596,7 +596,7 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, return -EFAULT; pr_debug("priv %p, del %d+%d\n", priv, (int)op.index, (int)op.count); - spin_lock(&priv->lock); + mutex_lock(&priv->lock); map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); if (map) { list_del(&map->next); @@ -604,7 +604,7 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, list_add_tail(&map->next, &priv->freeable_maps); err = 0; } - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); if (map) gntdev_put_map(priv, map); return err; @@ -672,7 +672,7 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) out_flags = op.action; out_event = op.event_channel_port; - spin_lock(&priv->lock); + mutex_lock(&priv->lock); list_for_each_entry(map, &priv->maps, next) { uint64_t begin = map->index << PAGE_SHIFT; @@ -700,7 +700,7 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) rc = 0; unlock_out: - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); /* Drop the reference to the event channel we did not save in the map */ if (out_flags & UNMAP_NOTIFY_SEND_EVENT) @@ -750,7 +750,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) pr_debug("map %d+%d at %lx (pgoff %lx)\n", index, count, vma->vm_start, vma->vm_pgoff); - spin_lock(&priv->lock); + mutex_lock(&priv->lock); map = gntdev_find_map_index(priv, index, count); if (!map) goto unlock_out; @@ -785,7 +785,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) map->flags |= GNTMAP_readonly; } - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); if (use_ptemod) { err = apply_to_page_range(vma->vm_mm, vma->vm_start, @@ -813,11 +813,11 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) return 0; unlock_out: - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); return err; out_unlock_put: - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); out_put_map: if (use_ptemod) map->vma = NULL; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a0b65a01fed7..86f86823a5f4 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1710,8 +1710,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, spin_unlock(&root->fs_info->trans_lock); wait_for_commit(root, prev_trans); + ret = prev_trans->aborted; btrfs_put_transaction(prev_trans); + if (ret) + goto cleanup_transaction; } else { spin_unlock(&root->fs_info->trans_lock); } diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index dfc95646b88c..31df9bceedca 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -67,6 +67,12 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, goto out_drop_write; } + if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) { + rc = -EBADF; + cifs_dbg(VFS, "src file seems to be from a different filesystem type\n"); + goto out_fput; + } + if ((!src_file.file->private_data) || (!dst_file->private_data)) { rc = -EBADF; cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); diff --git a/fs/coredump.c b/fs/coredump.c index a93f7e6ea4cf..72f97a56966f 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -498,10 +498,10 @@ void do_coredump(const siginfo_t *siginfo) const struct cred *old_cred; struct cred *cred; int retval = 0; - int flag = 0; int ispipe; struct files_struct *displaced; - bool need_nonrelative = false; + /* require nonrelative corefile path and be extra careful */ + bool need_suid_safe = false; bool core_dumped = false; static atomic_t core_dump_count = ATOMIC_INIT(0); struct coredump_params cprm = { @@ -535,9 +535,8 @@ void do_coredump(const siginfo_t *siginfo) */ if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { /* Setuid core dump mode */ - flag = O_EXCL; /* Stop rewrite attacks */ cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ - need_nonrelative = true; + need_suid_safe = true; } retval = coredump_wait(siginfo->si_signo, &core_state); @@ -618,7 +617,7 @@ void do_coredump(const siginfo_t *siginfo) if (cprm.limit < binfmt->min_coredump) goto fail_unlock; - if (need_nonrelative && cn.corename[0] != '/') { + if (need_suid_safe && cn.corename[0] != '/') { printk(KERN_WARNING "Pid %d(%s) can only dump core "\ "to fully qualified path!\n", task_tgid_vnr(current), current->comm); @@ -626,8 +625,35 @@ void do_coredump(const siginfo_t *siginfo) goto fail_unlock; } + /* + * Unlink the file if it exists unless this is a SUID + * binary - in that case, we're running around with root + * privs and don't want to unlink another user's coredump. + */ + if (!need_suid_safe) { + mm_segment_t old_fs; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + /* + * If it doesn't exist, that's fine. If there's some + * other problem, we'll catch it at the filp_open(). + */ + (void) sys_unlink((const char __user *)cn.corename); + set_fs(old_fs); + } + + /* + * There is a race between unlinking and creating the + * file, but if that causes an EEXIST here, that's + * fine - another process raced with us while creating + * the corefile, and the other process won. To userspace, + * what matters is that at least one of the two processes + * writes its coredump successfully, not which one. + */ cprm.file = filp_open(cn.corename, - O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, + O_CREAT | 2 | O_NOFOLLOW | + O_LARGEFILE | O_EXCL, 0600); if (IS_ERR(cprm.file)) goto fail_unlock; diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index d3fa6bd9503e..221719eac5de 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -288,7 +288,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) page_cache_release(page); goto fail; } - page_cache_release(page); node->page[i] = page; } @@ -398,11 +397,11 @@ node_error: void hfs_bnode_free(struct hfs_bnode *node) { - //int i; + int i; - //for (i = 0; i < node->tree->pages_per_bnode; i++) - // if (node->page[i]) - // page_cache_release(node->page[i]); + for (i = 0; i < node->tree->pages_per_bnode; i++) + if (node->page[i]) + page_cache_release(node->page[i]); kfree(node); } diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c index 9f4ee7f52026..6fc766df0461 100644 --- a/fs/hfs/brec.c +++ b/fs/hfs/brec.c @@ -131,13 +131,16 @@ skip: hfs_bnode_write(node, entry, data_off + key_len, entry_len); hfs_bnode_dump(node); - if (new_node) { - /* update parent key if we inserted a key - * at the start of the first node - */ - if (!rec && new_node != node) - hfs_brec_update_parent(fd); + /* + * update parent key if we inserted a key + * at the start of the node and it is not the new node + */ + if (!rec && new_node != node) { + hfs_bnode_read_key(node, fd->search_key, data_off + size); + hfs_brec_update_parent(fd); + } + if (new_node) { hfs_bnode_put(fd->bnode); if (!new_node->parent) { hfs_btree_inc_height(tree); @@ -166,9 +169,6 @@ skip: goto again; } - if (!rec) - hfs_brec_update_parent(fd); - return 0; } @@ -366,6 +366,8 @@ again: if (IS_ERR(parent)) return PTR_ERR(parent); __hfs_brec_find(parent, fd); + if (fd->record < 0) + return -ENOENT; hfs_bnode_dump(parent); rec = fd->record; diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index 11c860204520..bedfe5f7d332 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -456,7 +456,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) page_cache_release(page); goto fail; } - page_cache_release(page); node->page[i] = page; } @@ -568,13 +567,11 @@ node_error: void hfs_bnode_free(struct hfs_bnode *node) { -#if 0 int i; for (i = 0; i < node->tree->pages_per_bnode; i++) if (node->page[i]) page_cache_release(node->page[i]); -#endif kfree(node); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 58258ad50d5f..7b945957e230 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2275,7 +2275,7 @@ static int _nfs4_do_open(struct inode *dir, goto err_free_label; state = ctx->state; - if ((opendata->o_arg.open_flags & O_EXCL) && + if ((opendata->o_arg.open_flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL) && (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) { nfs4_exclusive_attrset(opendata, sattr); @@ -8368,6 +8368,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, .state_renewal_ops = &nfs41_state_renewal_ops, + .mig_recovery_ops = &nfs41_mig_recovery_ops, }; #endif diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 27d7f2742592..11763ce73709 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -60,8 +60,8 @@ EXPORT_SYMBOL_GPL(nfs_pgheader_init); void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) { spin_lock(&hdr->lock); - if (pos < hdr->io_start + hdr->good_bytes) { - set_bit(NFS_IOHDR_ERROR, &hdr->flags); + if (!test_and_set_bit(NFS_IOHDR_ERROR, &hdr->flags) + || pos < hdr->io_start + hdr->good_bytes) { clear_bit(NFS_IOHDR_EOF, &hdr->flags); hdr->good_bytes = pos - hdr->io_start; hdr->error = error; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 287cd5f23421..142d29e3ccdf 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1496,6 +1496,22 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint); } + /* + * Sanity check length of allocation descriptors and extended attrs to + * avoid integer overflows + */ + if (iinfo->i_lenEAttr > inode->i_sb->s_blocksize + || iinfo->i_lenAlloc > inode->i_sb->s_blocksize) { + make_bad_inode(inode); + return; + } + /* Now do exact checks */ + if (udf_file_entry_alloc_offset(inode) + + iinfo->i_lenAlloc > inode->i_sb->s_blocksize) { + make_bad_inode(inode); + return; + } + switch (fe->icbTag.fileType) { case ICBTAG_FILE_TYPE_DIRECTORY: inode->i_op = &udf_dir_inode_operations; diff --git a/include/net/ip.h b/include/net/ip.h index 5128fa7a8302..476bab2d28b0 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -154,6 +154,7 @@ static inline __u8 get_rtconn_flags(struct ipcm_cookie* ipc, struct sock* sk) } /* datagram.c */ +int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); void ip4_datagram_release_cb(struct sock *sk); diff --git a/kernel/fork.c b/kernel/fork.c index e2c685396295..1394fb3476d0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1756,13 +1756,21 @@ static int check_unshare_flags(unsigned long unshare_flags) CLONE_NEWUSER|CLONE_NEWPID)) return -EINVAL; /* - * Not implemented, but pretend it works if there is nothing to - * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND - * needs to unshare vm. + * Not implemented, but pretend it works if there is nothing + * to unshare. Note that unsharing the address space or the + * signal handlers also need to unshare the signal queues (aka + * CLONE_THREAD). */ if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) { - /* FIXME: get_task_mm() increments ->mm_users */ - if (atomic_read(¤t->mm->mm_users) > 1) + if (!thread_group_empty(current)) + return -EINVAL; + } + if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) { + if (atomic_read(¤t->sighand->count) > 1) + return -EINVAL; + } + if (unshare_flags & CLONE_VM) { + if (!current_is_single_threaded()) return -EINVAL; } @@ -1831,16 +1839,16 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) if (unshare_flags & CLONE_NEWUSER) unshare_flags |= CLONE_THREAD | CLONE_FS; /* - * If unsharing a thread from a thread group, must also unshare vm. - */ - if (unshare_flags & CLONE_THREAD) - unshare_flags |= CLONE_VM; - /* * If unsharing vm, must also unshare signal handlers. */ if (unshare_flags & CLONE_VM) unshare_flags |= CLONE_SIGHAND; /* + * If unsharing a signal handlers, must also unshare the signal queues. + */ + if (unshare_flags & CLONE_SIGHAND) + unshare_flags |= CLONE_THREAD; + /* * If unsharing namespace, must also unshare filesystem information. */ if (unshare_flags & CLONE_NEWNS) diff --git a/mm/vmscan.c b/mm/vmscan.c index 88edf53748ee..57af138d26a2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1087,7 +1087,7 @@ cull_mlocked: if (PageSwapCache(page)) try_to_free_swap(page); unlock_page(page); - putback_lru_page(page); + list_add(&page->lru, &ret_pages); continue; activate_locked: diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index b7b1914dfa25..27cf128ebc15 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -347,7 +347,6 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, return -ENOMEM; rcu_assign_pointer(*pp, p); - br_mdb_notify(br->dev, port, group, RTM_NEWMDB); return 0; } @@ -370,6 +369,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, if (!p || p->br != br || p->state == BR_STATE_DISABLED) return -EINVAL; + memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) ip.u.ip4 = entry->addr.u.ip4; @@ -416,6 +416,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (!netif_running(br->dev) || br->multicast_disabled) return -EINVAL; + memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) { if (timer_pending(&br->ip4_querier.timer)) diff --git a/net/core/datagram.c b/net/core/datagram.c index a16ed7bbe376..13bc7dad7990 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -130,6 +130,35 @@ out_noerr: goto out; } +static struct sk_buff *skb_set_peeked(struct sk_buff *skb) +{ + struct sk_buff *nskb; + + if (skb->peeked) + return skb; + + /* We have to unshare an skb before modifying it. */ + if (!skb_shared(skb)) + goto done; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return ERR_PTR(-ENOMEM); + + skb->prev->next = nskb; + skb->next->prev = nskb; + nskb->prev = skb->prev; + nskb->next = skb->next; + + consume_skb(skb); + skb = nskb; + +done: + skb->peeked = 1; + + return skb; +} + /** * __skb_recv_datagram - Receive a datagram skbuff * @sk: socket @@ -164,7 +193,9 @@ out_noerr: struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, int *peeked, int *off, int *err) { + struct sk_buff_head *queue = &sk->sk_receive_queue; struct sk_buff *skb, *last; + unsigned long cpu_flags; long timeo; /* * Caller is allowed not to check sk->sk_err before skb_recv_datagram() @@ -183,8 +214,6 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, * Look at current nfs client by the way... * However, this function was correct in any case. 8) */ - unsigned long cpu_flags; - struct sk_buff_head *queue = &sk->sk_receive_queue; int _off = *off; last = (struct sk_buff *)queue; @@ -198,7 +227,12 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, _off -= skb->len; continue; } - skb->peeked = 1; + + skb = skb_set_peeked(skb); + error = PTR_ERR(skb); + if (IS_ERR(skb)) + goto unlock_err; + atomic_inc(&skb->users); } else __skb_unlink(skb, queue); @@ -222,6 +256,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, return NULL; +unlock_err: + spin_unlock_irqrestore(&queue->lock, cpu_flags); no_packet: *err = error; return NULL; @@ -742,7 +778,8 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) if (likely(!sum)) { if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); - skb->ip_summed = CHECKSUM_UNNECESSARY; + if (!skb_shared(skb)) + skb->ip_summed = CHECKSUM_UNNECESSARY; } return sum; } diff --git a/net/core/dev.c b/net/core/dev.c index 1b9e700c85b6..ae3260fc133d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3214,6 +3214,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, local_irq_save(flags); rps_lock(sd); + if (!netif_running(skb->dev)) + goto drop; qlen = skb_queue_len(&sd->input_pkt_queue); if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { if (skb_queue_len(&sd->input_pkt_queue)) { @@ -3235,6 +3237,7 @@ enqueue: goto enqueue; } +drop: sd->dropped++; rps_unlock(sd); @@ -3551,8 +3554,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) pt_prev = NULL; - rcu_read_lock(); - another_round: skb->skb_iif = skb->dev->ifindex; @@ -3562,7 +3563,7 @@ another_round: skb->protocol == cpu_to_be16(ETH_P_8021AD)) { skb = skb_vlan_untag(skb); if (unlikely(!skb)) - goto unlock; + goto out; } #ifdef CONFIG_NET_CLS_ACT @@ -3587,7 +3588,7 @@ skip_taps: #ifdef CONFIG_NET_CLS_ACT skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) - goto unlock; + goto out; ncls: #endif @@ -3602,7 +3603,7 @@ ncls: if (vlan_do_receive(&skb)) goto another_round; else if (unlikely(!skb)) - goto unlock; + goto out; } rx_handler = rcu_dereference(skb->dev->rx_handler); @@ -3614,7 +3615,7 @@ ncls: switch (rx_handler(&skb)) { case RX_HANDLER_CONSUMED: ret = NET_RX_SUCCESS; - goto unlock; + goto out; case RX_HANDLER_ANOTHER: goto another_round; case RX_HANDLER_EXACT: @@ -3666,8 +3667,6 @@ drop: ret = NET_RX_DROP; } -unlock: - rcu_read_unlock(); out: return ret; } @@ -3699,29 +3698,30 @@ static int __netif_receive_skb(struct sk_buff *skb) static int netif_receive_skb_internal(struct sk_buff *skb) { + int ret; + net_timestamp_check(netdev_tstamp_prequeue, skb); if (skb_defer_rx_timestamp(skb)) return NET_RX_SUCCESS; + rcu_read_lock(); + #ifdef CONFIG_RPS if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; - int cpu, ret; - - rcu_read_lock(); - - cpu = get_rps_cpu(skb->dev, skb, &rflow); + int cpu = get_rps_cpu(skb->dev, skb, &rflow); if (cpu >= 0) { ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); return ret; } - rcu_read_unlock(); } #endif - return __netif_receive_skb(skb); + ret = __netif_receive_skb(skb); + rcu_read_unlock(); + return ret; } /** @@ -4182,8 +4182,10 @@ static int process_backlog(struct napi_struct *napi, int quota) unsigned int qlen; while ((skb = __skb_dequeue(&sd->process_queue))) { + rcu_read_lock(); local_irq_enable(); __netif_receive_skb(skb); + rcu_read_unlock(); local_irq_disable(); input_queue_head_incr(sd); if (++work >= quota) { @@ -5694,6 +5696,7 @@ static void rollback_registered_many(struct list_head *head) unlist_netdevice(dev); dev->reg_state = NETREG_UNREGISTERING; + on_each_cpu(flush_backlog, dev, 1); } synchronize_net(); @@ -5951,7 +5954,8 @@ static int netif_alloc_netdev_queues(struct net_device *dev) struct netdev_queue *tx; size_t sz = count * sizeof(*tx); - BUG_ON(count < 1 || count > 0xffff); + if (count < 1 || count > 0xffff) + return -EINVAL; tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); if (!tx) { @@ -6309,8 +6313,6 @@ void netdev_run_todo(void) dev->reg_state = NETREG_UNREGISTERED; - on_each_cpu(flush_backlog, dev, 1); - netdev_wait_allrefs(dev); /* paranoia */ diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 185c341fafbd..aeedc3a961a1 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -621,15 +621,17 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, { int idx = 0; struct fib_rule *rule; + int err = 0; rcu_read_lock(); list_for_each_entry_rcu(rule, &ops->rules_list, list) { if (idx < cb->args[1]) goto skip; - if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_NEWRULE, - NLM_F_MULTI, ops) < 0) + err = fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_NEWRULE, + NLM_F_MULTI, ops); + if (err) break; skip: idx++; @@ -638,7 +640,7 @@ skip: cb->args[1] = idx; rules_ops_put(ops); - return skb->len; + return err; } static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) @@ -654,7 +656,9 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) if (ops == NULL) return -EAFNOSUPPORT; - return dump_rules(skb, cb, ops); + dump_rules(skb, cb, ops); + + return skb->len; } rcu_read_lock(); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index ca68d32b49ba..dbd797a62b3a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3464,8 +3464,10 @@ static int pktgen_thread_worker(void *arg) pktgen_rem_thread(t); /* Wait for kthread_stop */ - while (!kthread_should_stop()) { + for (;;) { set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; schedule(); } __set_current_state(TASK_RUNNING); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8aadd6a072a4..465092f8fb76 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1259,10 +1259,6 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED }, }; -static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { - [IFLA_VF_INFO] = { .type = NLA_NESTED }, -}; - static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, @@ -1336,67 +1332,66 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return 0; } -static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) +static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) { - int rem, err = -EINVAL; - struct nlattr *vf; const struct net_device_ops *ops = dev->netdev_ops; + int err = -EINVAL; - nla_for_each_nested(vf, attr, rem) { - switch (nla_type(vf)) { - case IFLA_VF_MAC: { - struct ifla_vf_mac *ivm; - ivm = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_mac) - err = ops->ndo_set_vf_mac(dev, ivm->vf, - ivm->mac); - break; - } - case IFLA_VF_VLAN: { - struct ifla_vf_vlan *ivv; - ivv = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_vlan) - err = ops->ndo_set_vf_vlan(dev, ivv->vf, - ivv->vlan, - ivv->qos); - break; - } - case IFLA_VF_TX_RATE: { - struct ifla_vf_tx_rate *ivt; - ivt = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_tx_rate) - err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, - ivt->rate); - break; - } - case IFLA_VF_SPOOFCHK: { - struct ifla_vf_spoofchk *ivs; - ivs = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_spoofchk) - err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, - ivs->setting); - break; - } - case IFLA_VF_LINK_STATE: { - struct ifla_vf_link_state *ivl; - ivl = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_link_state) - err = ops->ndo_set_vf_link_state(dev, ivl->vf, - ivl->link_state); - break; - } - default: - err = -EINVAL; - break; - } - if (err) - break; + if (tb[IFLA_VF_MAC]) { + struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_mac) + err = ops->ndo_set_vf_mac(dev, ivm->vf, + ivm->mac); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_VLAN]) { + struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_vlan) + err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, + ivv->qos); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_TX_RATE]) { + struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_tx_rate) + err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, + ivt->rate); + if (err < 0) + return err; } + + if (tb[IFLA_VF_SPOOFCHK]) { + struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_spoofchk) + err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, + ivs->setting); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_LINK_STATE]) { + struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_link_state) + err = ops->ndo_set_vf_link_state(dev, ivl->vf, + ivl->link_state); + if (err < 0) + return err; + } + return err; } @@ -1579,14 +1574,21 @@ static int do_setlink(const struct sk_buff *skb, } if (tb[IFLA_VFINFO_LIST]) { + struct nlattr *vfinfo[IFLA_VF_MAX + 1]; struct nlattr *attr; int rem; + nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { - if (nla_type(attr) != IFLA_VF_INFO) { + if (nla_type(attr) != IFLA_VF_INFO || + nla_len(attr) < NLA_HDRLEN) { err = -EINVAL; goto errout; } - err = do_setvfinfo(dev, attr); + err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr, + ifla_vf_policy); + if (err < 0) + goto errout; + err = do_setvfinfo(dev, vfinfo); if (err < 0) goto errout; modified = 1; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 951fe55b1671..f4c804dbd3b4 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1291,7 +1291,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, encap = SKB_GSO_CB(skb)->encap_level > 0; if (encap) - features = skb->dev->hw_enc_features & netif_skb_features(skb); + features &= skb->dev->hw_enc_features; SKB_GSO_CB(skb)->encap_level += ihl; skb_reset_transport_header(skb); diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index a3095fdefbed..f0c307cb6196 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -20,7 +20,7 @@ #include #include -int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; @@ -39,8 +39,6 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_dst_reset(sk); - lock_sock(sk); - oif = sk->sk_bound_dev_if; saddr = inet->inet_saddr; if (ipv4_is_multicast(usin->sin_addr.s_addr)) { @@ -81,9 +79,19 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_dst_set(sk, &rt->dst); err = 0; out: - release_sock(sk); return err; } +EXPORT_SYMBOL(__ip4_datagram_connect); + +int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + int res; + + lock_sock(sk); + res = __ip4_datagram_connect(sk, uaddr, addr_len); + release_sock(sk); + return res; +} EXPORT_SYMBOL(ip4_datagram_connect); /* Because UDP xmit path can manipulate sk_dst_cache without holding diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 278836f1a5ad..0da513e7730a 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -69,7 +69,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, skb->mac_len = skb_inner_network_offset(skb); /* segment inner packet. */ - enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + enc_features = skb->dev->hw_enc_features & features; segs = skb_mac_gso_segment(skb, enc_features); if (!segs || IS_ERR(segs)) { skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 9ff497d17545..3f3a424ef0d5 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -357,7 +357,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) ihl = ip_hdrlen(skb); /* Determine the position of this fragment. */ - end = offset + skb->len - ihl; + end = offset + skb->len - skb_network_offset(skb) - ihl; err = -EINVAL; /* Is this the final fragment? */ @@ -387,7 +387,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) goto err; err = -ENOMEM; - if (pskb_pull(skb, ihl) == NULL) + if (!pskb_pull(skb, skb_network_offset(skb) + ihl)) goto err; err = pskb_trim_rcsum(skb, end - offset); @@ -628,6 +628,9 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0; iph->tot_len = htons(len); iph->tos |= ecn; + + ip_send_check(iph); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); qp->q.fragments = NULL; qp->q.fragments_tail = NULL; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 0a4af0920af3..2d4be69c64f4 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -484,7 +484,8 @@ drop: EXPORT_SYMBOL_GPL(ip_tunnel_rcv); static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, - struct rtable *rt, __be16 df) + struct rtable *rt, __be16 df, + const struct iphdr *inner_iph) { struct ip_tunnel *tunnel = netdev_priv(dev); int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; @@ -501,7 +502,8 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, if (skb->protocol == htons(ETH_P_IP)) { if (!skb_is_gso(skb) && - (df & htons(IP_DF)) && mtu < pkt_size) { + (inner_iph->frag_off & htons(IP_DF)) && + mtu < pkt_size) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); return -E2BIG; @@ -632,7 +634,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { + if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) { ip_rt_put(rt); goto tx_error; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 21a3a9e90b10..6970e36ad7b8 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1957,12 +1957,19 @@ void udp_v4_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; - dst = sk->sk_rx_dst; + dst = ACCESS_ONCE(sk->sk_rx_dst); if (dst) dst = dst_check(dst, 0); - if (dst) - skb_dst_set_noref(skb, dst); + if (dst) { + /* DST_NOCACHE can not be used without taking a reference */ + if (dst->flags & DST_NOCACHE) { + if (likely(atomic_inc_not_zero(&dst->__refcnt))) + skb_dst_set(skb, dst); + } else { + skb_dst_set_noref(skb, dst); + } + } } int udp_rcv(struct sk_buff *skb) @@ -2510,7 +2517,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, skb->protocol = htons(ETH_P_TEB); /* segment inner packet. */ - enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + enc_features = skb->dev->hw_enc_features & features; segs = skb_mac_gso_segment(skb, enc_features); if (!segs || IS_ERR(segs)) { skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 841cfa2c4600..6b89b2a1ac74 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -40,7 +40,7 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a) return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0); } -int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); @@ -56,7 +56,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (usin->sin6_family == AF_INET) { if (__ipv6_only_sock(sk)) return -EAFNOSUPPORT; - err = ip4_datagram_connect(sk, uaddr, addr_len); + err = __ip4_datagram_connect(sk, uaddr, addr_len); goto ipv4_connected; } @@ -98,9 +98,9 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sin.sin_addr.s_addr = daddr->s6_addr32[3]; sin.sin_port = usin->sin6_port; - err = ip4_datagram_connect(sk, - (struct sockaddr *) &sin, - sizeof(sin)); + err = __ip4_datagram_connect(sk, + (struct sockaddr *) &sin, + sizeof(sin)); ipv4_connected: if (err) @@ -203,6 +203,16 @@ out: fl6_sock_release(flowlabel); return err; } + +int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + int res; + + lock_sock(sk); + res = __ip6_datagram_connect(sk, uaddr, addr_len); + release_sock(sk); + return res; +} EXPORT_SYMBOL_GPL(ip6_datagram_connect); int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr, diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c index 447a7fbd1bb6..f5e2ba1c18bf 100644 --- a/net/ipv6/exthdrs_offload.c +++ b/net/ipv6/exthdrs_offload.c @@ -36,6 +36,6 @@ out: return ret; out_rt: - inet_del_offload(&rthdr_offload, IPPROTO_ROUTING); + inet6_del_offload(&rthdr_offload, IPPROTO_ROUTING); goto out; } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4a230b18dfe3..baffa3b7a328 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -357,6 +357,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); ip6gre_tunnel_unlink(ign, netdev_priv(dev)); + ip6_tnl_dst_reset(netdev_priv(dev)); dev_put(dev); } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 51d54dc376f3..05c94d9c3776 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -329,10 +329,10 @@ int ip6_mc_input(struct sk_buff *skb) if (offset < 0) goto out; - if (!ipv6_is_mld(skb, nexthdr, offset)) - goto out; + if (ipv6_is_mld(skb, nexthdr, offset)) + deliver = true; - deliver = true; + goto out; } /* unknown RA - process it normally */ } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index b2f091566f88..dc46ebae1987 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -112,7 +112,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, encap = SKB_GSO_CB(skb)->encap_level > 0; if (encap) - features = skb->dev->hw_enc_features & netif_skb_features(skb); + features &= skb->dev->hw_enc_features; SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h); ipv6h = ipv6_hdr(skb); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 8737400af0a0..821d8dfb2ddd 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -552,7 +552,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) if (it->cache == &mrt->mfc6_unres_queue) spin_unlock_bh(&mfc_unres_lock); - else if (it->cache == mrt->mfc6_cache_array) + else if (it->cache == &mrt->mfc6_cache_array[it->ct]) read_unlock(&mrt_lock); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index dca076f6252c..bc08a9ce3bd4 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -297,9 +297,6 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) if (tx->sdata->vif.type == NL80211_IFTYPE_WDS) return TX_CONTINUE; - if (tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT) - return TX_CONTINUE; - if (tx->flags & IEEE80211_TX_PS_BUFFERED) return TX_CONTINUE; diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 851cd880b0c0..0c970cbe0405 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -47,7 +47,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, __skb_push(skb, skb->mac_len); /* Segment inner packet. */ - mpls_features = skb->dev->mpls_features & netif_skb_features(skb); + mpls_features = skb->dev->mpls_features & features; segs = skb_mac_gso_segment(skb, mpls_features); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a0b0ea949192..fd9373c9f057 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -115,6 +115,24 @@ static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; } +static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, + gfp_t gfp_mask) +{ + unsigned int len = skb_end_offset(skb); + struct sk_buff *new; + + new = alloc_skb(len, gfp_mask); + if (new == NULL) + return NULL; + + NETLINK_CB(new).portid = NETLINK_CB(skb).portid; + NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group; + NETLINK_CB(new).creds = NETLINK_CB(skb).creds; + + memcpy(skb_put(new, len), skb->data, len); + return new; +} + int netlink_add_tap(struct netlink_tap *nt) { if (unlikely(nt->dev->type != ARPHRD_NETLINK)) @@ -199,7 +217,11 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, int ret = -ENOMEM; dev_hold(dev); - nskb = skb_clone(skb, GFP_ATOMIC); + + if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head)) + nskb = netlink_to_full_skb(skb, GFP_ATOMIC); + else + nskb = skb_clone(skb, GFP_ATOMIC); if (nskb) { nskb->dev = dev; nskb->protocol = htons((u16) sk->sk_protocol); @@ -271,11 +293,6 @@ static void netlink_rcv_wake(struct sock *sk) } #ifdef CONFIG_NETLINK_MMAP -static bool netlink_skb_is_mmaped(const struct sk_buff *skb) -{ - return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; -} - static bool netlink_rx_is_mmaped(struct sock *sk) { return nlk_sk(sk)->rx_ring.pg_vec != NULL; @@ -350,25 +367,52 @@ err1: return NULL; } + +static void +__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec, + unsigned int order) +{ + struct netlink_sock *nlk = nlk_sk(sk); + struct sk_buff_head *queue; + struct netlink_ring *ring; + + queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; + ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; + + spin_lock_bh(&queue->lock); + + ring->frame_max = req->nm_frame_nr - 1; + ring->head = 0; + ring->frame_size = req->nm_frame_size; + ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; + + swap(ring->pg_vec_len, req->nm_block_nr); + swap(ring->pg_vec_order, order); + swap(ring->pg_vec, pg_vec); + + __skb_queue_purge(queue); + spin_unlock_bh(&queue->lock); + + WARN_ON(atomic_read(&nlk->mapped)); + + if (pg_vec) + free_pg_vec(pg_vec, order, req->nm_block_nr); +} + static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, - bool closing, bool tx_ring) + bool tx_ring) { struct netlink_sock *nlk = nlk_sk(sk); struct netlink_ring *ring; - struct sk_buff_head *queue; void **pg_vec = NULL; unsigned int order = 0; - int err; ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; - queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; - if (!closing) { - if (atomic_read(&nlk->mapped)) - return -EBUSY; - if (atomic_read(&ring->pending)) - return -EBUSY; - } + if (atomic_read(&nlk->mapped)) + return -EBUSY; + if (atomic_read(&ring->pending)) + return -EBUSY; if (req->nm_block_nr) { if (ring->pg_vec != NULL) @@ -400,31 +444,19 @@ static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, return -EINVAL; } - err = -EBUSY; mutex_lock(&nlk->pg_vec_lock); - if (closing || atomic_read(&nlk->mapped) == 0) { - err = 0; - spin_lock_bh(&queue->lock); - - ring->frame_max = req->nm_frame_nr - 1; - ring->head = 0; - ring->frame_size = req->nm_frame_size; - ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; - - swap(ring->pg_vec_len, req->nm_block_nr); - swap(ring->pg_vec_order, order); - swap(ring->pg_vec, pg_vec); - - __skb_queue_purge(queue); - spin_unlock_bh(&queue->lock); - - WARN_ON(atomic_read(&nlk->mapped)); + if (atomic_read(&nlk->mapped) == 0) { + __netlink_set_ring(sk, req, tx_ring, pg_vec, order); + mutex_unlock(&nlk->pg_vec_lock); + return 0; } + mutex_unlock(&nlk->pg_vec_lock); if (pg_vec) free_pg_vec(pg_vec, order, req->nm_block_nr); - return err; + + return -EBUSY; } static void netlink_mm_open(struct vm_area_struct *vma) @@ -812,7 +844,6 @@ static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) } #else /* CONFIG_NETLINK_MMAP */ -#define netlink_skb_is_mmaped(skb) false #define netlink_rx_is_mmaped(sk) false #define netlink_tx_is_mmaped(sk) false #define netlink_mmap sock_no_mmap @@ -893,10 +924,10 @@ static void netlink_sock_destruct(struct sock *sk) memset(&req, 0, sizeof(req)); if (nlk->rx_ring.pg_vec) - netlink_set_ring(sk, &req, true, false); + __netlink_set_ring(sk, &req, false, NULL, 0); memset(&req, 0, sizeof(req)); if (nlk->tx_ring.pg_vec) - netlink_set_ring(sk, &req, true, true); + __netlink_set_ring(sk, &req, true, NULL, 0); } #endif /* CONFIG_NETLINK_MMAP */ @@ -2190,7 +2221,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, return -EINVAL; if (copy_from_user(&req, optval, sizeof(req))) return -EFAULT; - err = netlink_set_ring(sk, &req, false, + err = netlink_set_ring(sk, &req, optname == NETLINK_TX_RING); break; } diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index acbd774eeb7c..dcc89c74b514 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -65,6 +65,15 @@ struct nl_portid_hash { u32 rnd; }; +static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb) +{ +#ifdef CONFIG_NETLINK_MMAP + return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; +#else + return false; +#endif /* CONFIG_NETLINK_MMAP */ +} + struct netlink_table { struct nl_portid_hash hash; struct hlist_head mc_list; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 270b77dfac30..8bb1a5a9b02e 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -803,7 +803,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(acts)) goto error; - ovs_flow_mask_key(&masked_key, &key, &mask); + ovs_flow_mask_key(&masked_key, &key, true, &mask); error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &masked_key, 0, &acts); if (error) { diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 3c268b3d71c3..4877d5a212a2 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -55,18 +55,21 @@ static u16 range_n_bytes(const struct sw_flow_key_range *range) } void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, - const struct sw_flow_mask *mask) + bool full, const struct sw_flow_mask *mask) { - const long *m = (long *)((u8 *)&mask->key + mask->range.start); - const long *s = (long *)((u8 *)src + mask->range.start); - long *d = (long *)((u8 *)dst + mask->range.start); + int start = full ? 0 : mask->range.start; + int len = full ? sizeof *dst : range_n_bytes(&mask->range); + const long *m = (const long *)((const u8 *)&mask->key + start); + const long *s = (const long *)((const u8 *)src + start); + long *d = (long *)((u8 *)dst + start); int i; - /* The memory outside of the 'mask->range' are not set since - * further operations on 'dst' only uses contents within - * 'mask->range'. + /* If 'full' is true then all of 'dst' is fully initialized. Otherwise, + * if 'full' is false the memory outside of the 'mask->range' is left + * uninitialized. This can be used as an optimization when further + * operations on 'dst' only use contents within 'mask->range'. */ - for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) + for (i = 0; i < len; i += sizeof(long)) *d++ = *s++ & *m++; } @@ -436,7 +439,7 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, u32 hash; struct sw_flow_key masked_key; - ovs_flow_mask_key(&masked_key, unmasked, mask); + ovs_flow_mask_key(&masked_key, unmasked, false, mask); hash = flow_hash(&masked_key, key_start, key_end); head = find_bucket(ti, hash); hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) { diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index baaeb101924d..82e64a9b6416 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -79,5 +79,5 @@ bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, struct sw_flow_match *match); void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, - const struct sw_flow_mask *mask); + bool full, const struct sw_flow_mask *mask); #endif /* flow_table.h */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 84a60b82e235..fee7dcc28abd 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2645,7 +2645,7 @@ static int packet_release(struct socket *sock) static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) { struct packet_sock *po = pkt_sk(sk); - const struct net_device *dev_curr; + struct net_device *dev_curr; __be16 proto_curr; bool need_rehook; @@ -2669,15 +2669,13 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) po->num = proto; po->prot_hook.type = proto; - - if (po->prot_hook.dev) - dev_put(po->prot_hook.dev); - po->prot_hook.dev = dev; po->ifindex = dev ? dev->ifindex : 0; packet_cached_dev_assign(po, dev); } + if (dev_curr) + dev_put(dev_curr); if (proto == 0 || !need_rehook) goto out_unlock; diff --git a/net/rds/info.c b/net/rds/info.c index 9a6b4f66187c..140a44a5f7b7 100644 --- a/net/rds/info.c +++ b/net/rds/info.c @@ -176,7 +176,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, /* check for all kinds of wrapping and the like */ start = (unsigned long)optval; - if (len < 0 || len + PAGE_SIZE - 1 < len || start + len < start) { + if (len < 0 || len > INT_MAX - PAGE_SIZE + 1 || start + len < start) { ret = -EINVAL; goto out; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index a62a215dd22e..1237a237495b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1167,7 +1167,7 @@ static void sctp_v4_del_protocol(void) unregister_inetaddr_notifier(&sctp_inetaddr_notifier); } -static int __net_init sctp_net_init(struct net *net) +static int __net_init sctp_defaults_init(struct net *net) { int status; @@ -1260,12 +1260,6 @@ static int __net_init sctp_net_init(struct net *net) sctp_dbg_objcnt_init(net); - /* Initialize the control inode/socket for handling OOTB packets. */ - if ((status = sctp_ctl_sock_init(net))) { - pr_err("Failed to initialize the SCTP control sock\n"); - goto err_ctl_sock_init; - } - /* Initialize the local address list. */ INIT_LIST_HEAD(&net->sctp.local_addr_list); spin_lock_init(&net->sctp.local_addr_lock); @@ -1281,9 +1275,6 @@ static int __net_init sctp_net_init(struct net *net) return 0; -err_ctl_sock_init: - sctp_dbg_objcnt_exit(net); - sctp_proc_exit(net); err_init_proc: cleanup_sctp_mibs(net); err_init_mibs: @@ -1292,15 +1283,12 @@ err_sysctl_register: return status; } -static void __net_exit sctp_net_exit(struct net *net) +static void __net_exit sctp_defaults_exit(struct net *net) { /* Free the local address list */ sctp_free_addr_wq(net); sctp_free_local_addr_list(net); - /* Free the control endpoint. */ - inet_ctl_sock_destroy(net->sctp.ctl_sock); - sctp_dbg_objcnt_exit(net); sctp_proc_exit(net); @@ -1308,9 +1296,32 @@ static void __net_exit sctp_net_exit(struct net *net) sctp_sysctl_net_unregister(net); } -static struct pernet_operations sctp_net_ops = { - .init = sctp_net_init, - .exit = sctp_net_exit, +static struct pernet_operations sctp_defaults_ops = { + .init = sctp_defaults_init, + .exit = sctp_defaults_exit, +}; + +static int __net_init sctp_ctrlsock_init(struct net *net) +{ + int status; + + /* Initialize the control inode/socket for handling OOTB packets. */ + status = sctp_ctl_sock_init(net); + if (status) + pr_err("Failed to initialize the SCTP control sock\n"); + + return status; +} + +static void __net_init sctp_ctrlsock_exit(struct net *net) +{ + /* Free the control endpoint. */ + inet_ctl_sock_destroy(net->sctp.ctl_sock); +} + +static struct pernet_operations sctp_ctrlsock_ops = { + .init = sctp_ctrlsock_init, + .exit = sctp_ctrlsock_exit, }; /* Initialize the universe into something sensible. */ @@ -1444,8 +1455,11 @@ static __init int sctp_init(void) sctp_v4_pf_init(); sctp_v6_pf_init(); - status = sctp_v4_protosw_init(); + status = register_pernet_subsys(&sctp_defaults_ops); + if (status) + goto err_register_defaults; + status = sctp_v4_protosw_init(); if (status) goto err_protosw_init; @@ -1453,9 +1467,9 @@ static __init int sctp_init(void) if (status) goto err_v6_protosw_init; - status = register_pernet_subsys(&sctp_net_ops); + status = register_pernet_subsys(&sctp_ctrlsock_ops); if (status) - goto err_register_pernet_subsys; + goto err_register_ctrlsock; status = sctp_v4_add_protocol(); if (status) @@ -1471,12 +1485,14 @@ out: err_v6_add_protocol: sctp_v4_del_protocol(); err_add_protocol: - unregister_pernet_subsys(&sctp_net_ops); -err_register_pernet_subsys: + unregister_pernet_subsys(&sctp_ctrlsock_ops); +err_register_ctrlsock: sctp_v6_protosw_exit(); err_v6_protosw_init: sctp_v4_protosw_exit(); err_protosw_init: + unregister_pernet_subsys(&sctp_defaults_ops); +err_register_defaults: sctp_v4_pf_exit(); sctp_v6_pf_exit(); sctp_sysctl_unregister(); @@ -1509,12 +1525,14 @@ static __exit void sctp_exit(void) sctp_v6_del_protocol(); sctp_v4_del_protocol(); - unregister_pernet_subsys(&sctp_net_ops); + unregister_pernet_subsys(&sctp_ctrlsock_ops); /* Free protosw registrations */ sctp_v6_protosw_exit(); sctp_v4_protosw_exit(); + unregister_pernet_subsys(&sctp_defaults_ops); + /* Unregister with socket layer. */ sctp_v6_pf_exit(); sctp_v4_pf_exit(); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 0ed0eaa62f29..830e40b329d6 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1681,6 +1681,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); if (res) goto exit; + security_sk_clone(sock->sk, new_sock->sk); new_sk = new_sock->sk; new_tsock = tipc_sk(new_sk); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 907371d87312..08e29311e17d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1193,7 +1193,7 @@ static const struct hda_fixup alc880_fixups[] = { /* override all pins as BIOS on old Amilo is broken */ .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { - { 0x14, 0x0121411f }, /* HP */ + { 0x14, 0x0121401f }, /* HP */ { 0x15, 0x99030120 }, /* speaker */ { 0x16, 0x99030130 }, /* bass speaker */ { 0x17, 0x411111f0 }, /* N/A */ @@ -1213,7 +1213,7 @@ static const struct hda_fixup alc880_fixups[] = { /* almost compatible with FUJITSU, but no bass and SPDIF */ .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { - { 0x14, 0x0121411f }, /* HP */ + { 0x14, 0x0121401f }, /* HP */ { 0x15, 0x99030120 }, /* speaker */ { 0x16, 0x411111f0 }, /* N/A */ { 0x17, 0x411111f0 }, /* N/A */ @@ -1421,7 +1421,7 @@ static const struct snd_pci_quirk alc880_fixup_tbl[] = { SND_PCI_QUIRK(0x161f, 0x203d, "W810", ALC880_FIXUP_W810), SND_PCI_QUIRK(0x161f, 0x205d, "Medion Rim 2150", ALC880_FIXUP_MEDION_RIM), SND_PCI_QUIRK(0x1631, 0xe011, "PB 13201056", ALC880_FIXUP_6ST_AUTOMUTE), - SND_PCI_QUIRK(0x1734, 0x107c, "FSC F1734", ALC880_FIXUP_F1734), + SND_PCI_QUIRK(0x1734, 0x107c, "FSC Amilo M1437", ALC880_FIXUP_FUJITSU), SND_PCI_QUIRK(0x1734, 0x1094, "FSC Amilo M1451G", ALC880_FIXUP_FUJITSU), SND_PCI_QUIRK(0x1734, 0x10ac, "FSC AMILO Xi 1526", ALC880_FIXUP_F1734), SND_PCI_QUIRK(0x1734, 0x10b0, "FSC Amilo Pi1556", ALC880_FIXUP_FUJITSU),