diff --git a/Makefile b/Makefile index f5a51cd7ca49..55500e023f61 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 103 +SUBLEVEL = 104 EXTRAVERSION = NAME = Blurry Fish Butt diff --git a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts index 5b0430041ec6..fec92cd36ae3 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts @@ -88,7 +88,7 @@ interrupts-extended = <&intc 83 &omap3_pmx_core 0x11a>; pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins &mmc1_cd>; - cd-gpios = <&gpio4 31 IRQ_TYPE_LEVEL_LOW>; /* gpio127 */ + cd-gpios = <&gpio4 31 GPIO_ACTIVE_LOW>; /* gpio127 */ vmmc-supply = <&vmmc1>; bus-width = <4>; cap-power-off-card; diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 0010c78c4998..8fd9e637629a 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -3,6 +3,7 @@ #include #include +#include /* * We map the EFI regions needed for runtime services non-contiguously, @@ -64,6 +65,17 @@ extern u64 asmlinkage efi_call(void *fp, ...); #define efi_call_phys(f, args...) efi_call((f), args) +/* + * Scratch space used for switching the pagetable in the EFI stub + */ +struct efi_scratch { + u64 r15; + u64 prev_cr3; + pgd_t *efi_pgt; + bool use_pgd; + u64 phys_stack; +} __packed; + #define efi_call_virt(f, ...) \ ({ \ efi_status_t __s; \ @@ -71,7 +83,20 @@ extern u64 asmlinkage efi_call(void *fp, ...); efi_sync_low_kernel_mappings(); \ preempt_disable(); \ __kernel_fpu_begin(); \ + \ + if (efi_scratch.use_pgd) { \ + efi_scratch.prev_cr3 = read_cr3(); \ + write_cr3((unsigned long)efi_scratch.efi_pgt); \ + __flush_tlb_all(); \ + } \ + \ __s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__); \ + \ + if (efi_scratch.use_pgd) { \ + write_cr3(efi_scratch.prev_cr3); \ + __flush_tlb_all(); \ + } \ + \ __kernel_fpu_end(); \ preempt_enable(); \ __s; \ @@ -111,6 +136,7 @@ extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); extern void __init efi_map_region_fixed(efi_memory_desc_t *md); extern void efi_sync_low_kernel_mappings(void); +extern int __init efi_alloc_page_tables(void); extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init old_map_region(efi_memory_desc_t *md); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 4e1b254c3695..4b1152e57340 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1696,6 +1696,8 @@ static int ud_interception(struct vcpu_svm *svm) int er; er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); + if (er == EMULATE_USER_EXIT) + return 0; if (er != EMULATE_DONE) kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 67ba0d8f87c7..253a8c8207bb 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5267,6 +5267,8 @@ static int handle_exception(struct kvm_vcpu *vcpu) return 1; } er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); + if (er == EMULATE_USER_EXIT) + return 0; if (er != EMULATE_DONE) kvm_queue_exception(vcpu, UD_VECTOR); return 1; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3ffd5900da5b..df81717a92f3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1812,6 +1812,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) */ BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); + if (guest_hv_clock.version & 1) + ++guest_hv_clock.version; /* first time write, random junk */ + vcpu->hv_clock.version = guest_hv_clock.version + 1; kvm_write_guest_cached(v->kvm, &vcpu->pv_time, &vcpu->hv_clock, @@ -5426,6 +5429,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, emulation_type)) return EMULATE_DONE; + if (ctxt->have_exception && inject_emulated_exception(vcpu)) + return EMULATE_DONE; if (emulation_type & EMULTYPE_SKIP) return EMULATE_FAIL; return handle_emulation_failure(vcpu); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index b599a780a5a9..a0fe62e3f4a3 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -911,15 +911,10 @@ static void populate_pte(struct cpa_data *cpa, pte = pte_offset_kernel(pmd, start); while (num_pages-- && start < end) { - - /* deal with the NX bit */ - if (!(pgprot_val(pgprot) & _PAGE_NX)) - cpa->pfn &= ~_PAGE_NX; - - set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot)); + set_pte(pte, pfn_pte(cpa->pfn, pgprot)); start += PAGE_SIZE; - cpa->pfn += PAGE_SIZE; + cpa->pfn++; pte++; } } @@ -975,11 +970,11 @@ static int populate_pmd(struct cpa_data *cpa, pmd = pmd_offset(pud, start); - set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | + set_pmd(pmd, __pmd(cpa->pfn << PAGE_SHIFT | _PAGE_PSE | massage_pgprot(pmd_pgprot))); start += PMD_SIZE; - cpa->pfn += PMD_SIZE; + cpa->pfn += PMD_SIZE >> PAGE_SHIFT; cur_pages += PMD_SIZE >> PAGE_SHIFT; } @@ -1048,11 +1043,11 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, * Map everything starting from the Gb boundary, possibly with 1G pages */ while (end - start >= PUD_SIZE) { - set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | + set_pud(pud, __pud(cpa->pfn << PAGE_SHIFT | _PAGE_PSE | massage_pgprot(pud_pgprot))); start += PUD_SIZE; - cpa->pfn += PUD_SIZE; + cpa->pfn += PUD_SIZE >> PAGE_SHIFT; cur_pages += PUD_SIZE >> PAGE_SHIFT; pud++; } diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index ea48449b2e63..64fbc7e33226 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -28,8 +28,7 @@ struct bmp_header { void __init efi_bgrt_init(void) { acpi_status status; - void __iomem *image; - bool ioremapped = false; + void *image; struct bmp_header bmp_header; if (acpi_disabled) @@ -70,20 +69,14 @@ void __init efi_bgrt_init(void) return; } - image = efi_lookup_mapped_addr(bgrt_tab->image_address); + image = memremap(bgrt_tab->image_address, sizeof(bmp_header), MEMREMAP_WB); if (!image) { - image = early_ioremap(bgrt_tab->image_address, - sizeof(bmp_header)); - ioremapped = true; - if (!image) { - pr_err("Ignoring BGRT: failed to map image header memory\n"); - return; - } + pr_err("Ignoring BGRT: failed to map image header memory\n"); + return; } - memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); - if (ioremapped) - early_iounmap(image, sizeof(bmp_header)); + memcpy(&bmp_header, image, sizeof(bmp_header)); + memunmap(image); bgrt_image_size = bmp_header.size; bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN); @@ -93,18 +86,14 @@ void __init efi_bgrt_init(void) return; } - if (ioremapped) { - image = early_ioremap(bgrt_tab->image_address, - bmp_header.size); - if (!image) { - pr_err("Ignoring BGRT: failed to map image memory\n"); - kfree(bgrt_image); - bgrt_image = NULL; - return; - } + image = memremap(bgrt_tab->image_address, bmp_header.size, MEMREMAP_WB); + if (!image) { + pr_err("Ignoring BGRT: failed to map image memory\n"); + kfree(bgrt_image); + bgrt_image = NULL; + return; } - memcpy_fromio(bgrt_image, image, bgrt_image_size); - if (ioremapped) - early_iounmap(image, bmp_header.size); + memcpy(bgrt_image, image, bgrt_image_size); + memunmap(image); } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index ad285404ea7f..3c1f3cd7b2ba 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -869,7 +869,7 @@ static void __init kexec_enter_virtual_mode(void) * This function will switch the EFI runtime services to virtual mode. * Essentially, we look through the EFI memmap and map every region that * has the runtime attribute bit set in its memory descriptor into the - * ->trampoline_pgd page table using a top-down VA allocation scheme. + * efi_pgd page table. * * The old method which used to update that memory descriptor with the * virtual address obtained from ioremap() is still supported when the @@ -879,8 +879,8 @@ static void __init kexec_enter_virtual_mode(void) * * The new method does a pagetable switch in a preemption-safe manner * so that we're in a different address space when calling a runtime - * function. For function arguments passing we do copy the PGDs of the - * kernel page table into ->trampoline_pgd prior to each call. + * function. For function arguments passing we do copy the PUDs of the + * kernel page table into efi_pgd prior to each call. * * Specially for kexec boot, efi runtime maps in previous kernel should * be passed in via setup_data. In that case runtime ranges will be mapped @@ -895,6 +895,12 @@ static void __init __efi_enter_virtual_mode(void) efi.systab = NULL; + if (efi_alloc_page_tables()) { + pr_err("Failed to allocate EFI page tables\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + efi_merge_regions(); new_memmap = efi_map_regions(&count, &pg_shift); if (!new_memmap) { @@ -954,28 +960,11 @@ static void __init __efi_enter_virtual_mode(void) efi_runtime_mkexec(); /* - * We mapped the descriptor array into the EFI pagetable above but we're - * not unmapping it here. Here's why: - * - * We're copying select PGDs from the kernel page table to the EFI page - * table and when we do so and make changes to those PGDs like unmapping - * stuff from them, those changes appear in the kernel page table and we - * go boom. - * - * From setup_real_mode(): - * - * ... - * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd; - * - * In this particular case, our allocation is in PGD 0 of the EFI page - * table but we've copied that PGD from PGD[272] of the EFI page table: - * - * pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272 - * - * where the direct memory mapping in kernel space is. - * - * new_memmap's VA comes from that direct mapping and thus clearing it, - * it would get cleared in the kernel page table too. + * We mapped the descriptor array into the EFI pagetable above + * but we're not unmapping it here because if we're running in + * EFI mixed mode we need all of memory to be accessible when + * we pass parameters to the EFI runtime services in the + * thunking code. * * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift); */ diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index ed5b67338294..58d669bc8250 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -38,6 +38,11 @@ * say 0 - 3G. */ +int __init efi_alloc_page_tables(void) +{ + return 0; +} + void efi_sync_low_kernel_mappings(void) {} void __init efi_dump_pagetable(void) {} int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index a0ac0f9c307f..18dfaad71c99 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -40,6 +40,7 @@ #include #include #include +#include /* * We allocate runtime services regions bottom-up, starting from -4G, i.e. @@ -47,16 +48,7 @@ */ static u64 efi_va = EFI_VA_START; -/* - * Scratch space used for switching the pagetable in the EFI stub - */ -struct efi_scratch { - u64 r15; - u64 prev_cr3; - pgd_t *efi_pgt; - bool use_pgd; - u64 phys_stack; -} __packed; +struct efi_scratch efi_scratch; static void __init early_code_mapping_set_exec(int executable) { @@ -83,8 +75,11 @@ pgd_t * __init efi_call_phys_prolog(void) int pgd; int n_pgds; - if (!efi_enabled(EFI_OLD_MEMMAP)) - return NULL; + if (!efi_enabled(EFI_OLD_MEMMAP)) { + save_pgd = (pgd_t *)read_cr3(); + write_cr3((unsigned long)efi_scratch.efi_pgt); + goto out; + } early_code_mapping_set_exec(1); @@ -96,6 +91,7 @@ pgd_t * __init efi_call_phys_prolog(void) vaddress = (unsigned long)__va(pgd * PGDIR_SIZE); set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); } +out: __flush_tlb_all(); return save_pgd; @@ -109,8 +105,11 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) int pgd_idx; int nr_pgds; - if (!save_pgd) + if (!efi_enabled(EFI_OLD_MEMMAP)) { + write_cr3((unsigned long)save_pgd); + __flush_tlb_all(); return; + } nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); @@ -123,27 +122,97 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) early_code_mapping_set_exec(0); } +static pgd_t *efi_pgd; + +/* + * We need our own copy of the higher levels of the page tables + * because we want to avoid inserting EFI region mappings (EFI_VA_END + * to EFI_VA_START) into the standard kernel page tables. Everything + * else can be shared, see efi_sync_low_kernel_mappings(). + */ +int __init efi_alloc_page_tables(void) +{ + pgd_t *pgd; + pud_t *pud; + gfp_t gfp_mask; + + if (efi_enabled(EFI_OLD_MEMMAP)) + return 0; + + gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO; + efi_pgd = (pgd_t *)__get_free_page(gfp_mask); + if (!efi_pgd) + return -ENOMEM; + + pgd = efi_pgd + pgd_index(EFI_VA_END); + + pud = pud_alloc_one(NULL, 0); + if (!pud) { + free_page((unsigned long)efi_pgd); + return -ENOMEM; + } + + pgd_populate(NULL, pgd, pud); + + return 0; +} + /* * Add low kernel mappings for passing arguments to EFI functions. */ void efi_sync_low_kernel_mappings(void) { - unsigned num_pgds; - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + unsigned num_entries; + pgd_t *pgd_k, *pgd_efi; + pud_t *pud_k, *pud_efi; if (efi_enabled(EFI_OLD_MEMMAP)) return; - num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET); + /* + * We can share all PGD entries apart from the one entry that + * covers the EFI runtime mapping space. + * + * Make sure the EFI runtime region mappings are guaranteed to + * only span a single PGD entry and that the entry also maps + * other important kernel regions. + */ + BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); + BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != + (EFI_VA_END & PGDIR_MASK)); + + pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); + pgd_k = pgd_offset_k(PAGE_OFFSET); - memcpy(pgd + pgd_index(PAGE_OFFSET), - init_mm.pgd + pgd_index(PAGE_OFFSET), - sizeof(pgd_t) * num_pgds); + num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET); + memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); + + /* + * We share all the PUD entries apart from those that map the + * EFI regions. Copy around them. + */ + BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0); + BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0); + + pgd_efi = efi_pgd + pgd_index(EFI_VA_END); + pud_efi = pud_offset(pgd_efi, 0); + + pgd_k = pgd_offset_k(EFI_VA_END); + pud_k = pud_offset(pgd_k, 0); + + num_entries = pud_index(EFI_VA_END); + memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + + pud_efi = pud_offset(pgd_efi, EFI_VA_START); + pud_k = pud_offset(pgd_k, EFI_VA_START); + + num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START); + memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); } int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { - unsigned long text; + unsigned long pfn, text; struct page *page; unsigned npages; pgd_t *pgd; @@ -151,8 +220,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) if (efi_enabled(EFI_OLD_MEMMAP)) return 0; - efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; - pgd = __va(efi_scratch.efi_pgt); + efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd); + pgd = efi_pgd; /* * It can happen that the physical address of new_memmap lands in memory @@ -160,7 +229,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * and ident-map those pages containing the map before calling * phys_efi_set_virtual_address_map(). */ - if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, _PAGE_NX)) { + pfn = pa_memmap >> PAGE_SHIFT; + if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX)) { pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); return 1; } @@ -185,8 +255,9 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) npages = (_end - _text) >> PAGE_SHIFT; text = __pa(_text); + pfn = text >> PAGE_SHIFT; - if (kernel_map_pages_in_pgd(pgd, text >> PAGE_SHIFT, text, npages, 0)) { + if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, 0)) { pr_err("Failed to map kernel text 1:1\n"); return 1; } @@ -196,20 +267,20 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) { - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); - - kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages); + kernel_unmap_pages_in_pgd(efi_pgd, pa_memmap, num_pages); } static void __init __map_region(efi_memory_desc_t *md, u64 va) { - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); - unsigned long pf = 0; + unsigned long flags = 0; + unsigned long pfn; + pgd_t *pgd = efi_pgd; if (!(md->attribute & EFI_MEMORY_WB)) - pf |= _PAGE_PCD; + flags |= _PAGE_PCD; - if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf)) + pfn = md->phys_addr >> PAGE_SHIFT; + if (kernel_map_pages_in_pgd(pgd, pfn, va, md->num_pages, flags)) pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", md->phys_addr, va); } @@ -312,9 +383,7 @@ void __init efi_runtime_mkexec(void) void __init efi_dump_pagetable(void) { #ifdef CONFIG_EFI_PGT_DUMP - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); - - ptdump_walk_pgd_level(NULL, pgd); + ptdump_walk_pgd_level(NULL, efi_pgd); #endif } diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 86d0f9e08dd9..32020cb8bb08 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -38,41 +38,6 @@ mov %rsi, %cr0; \ mov (%rsp), %rsp - /* stolen from gcc */ - .macro FLUSH_TLB_ALL - movq %r15, efi_scratch(%rip) - movq %r14, efi_scratch+8(%rip) - movq %cr4, %r15 - movq %r15, %r14 - andb $0x7f, %r14b - movq %r14, %cr4 - movq %r15, %cr4 - movq efi_scratch+8(%rip), %r14 - movq efi_scratch(%rip), %r15 - .endm - - .macro SWITCH_PGT - cmpb $0, efi_scratch+24(%rip) - je 1f - movq %r15, efi_scratch(%rip) # r15 - # save previous CR3 - movq %cr3, %r15 - movq %r15, efi_scratch+8(%rip) # prev_cr3 - movq efi_scratch+16(%rip), %r15 # EFI pgt - movq %r15, %cr3 - 1: - .endm - - .macro RESTORE_PGT - cmpb $0, efi_scratch+24(%rip) - je 2f - movq efi_scratch+8(%rip), %r15 - movq %r15, %cr3 - movq efi_scratch(%rip), %r15 - FLUSH_TLB_ALL - 2: - .endm - ENTRY(efi_call) SAVE_XMM mov (%rsp), %rax @@ -83,16 +48,8 @@ ENTRY(efi_call) mov %r8, %r9 mov %rcx, %r8 mov %rsi, %rcx - SWITCH_PGT call *%rdi - RESTORE_PGT addq $48, %rsp RESTORE_XMM ret ENDPROC(efi_call) - - .data -ENTRY(efi_scratch) - .fill 3,8,0 - .byte 0 - .quad 0 diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 3b52677f459a..0cd8f039602e 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -325,38 +325,6 @@ u64 __init efi_mem_desc_end(efi_memory_desc_t *md) return end; } -/* - * We can't ioremap data in EFI boot services RAM, because we've already mapped - * it as RAM. So, look it up in the existing EFI memory map instead. Only - * callable after efi_enter_virtual_mode and before efi_free_boot_services. - */ -void __iomem *efi_lookup_mapped_addr(u64 phys_addr) -{ - struct efi_memory_map *map; - void *p; - map = efi.memmap; - if (!map) - return NULL; - if (WARN_ON(!map->map)) - return NULL; - for (p = map->map; p < map->map_end; p += map->desc_size) { - efi_memory_desc_t *md = p; - u64 size = md->num_pages << EFI_PAGE_SHIFT; - u64 end = md->phys_addr + size; - if (!(md->attribute & EFI_MEMORY_RUNTIME) && - md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) - continue; - if (!md->virt_addr) - continue; - if (phys_addr >= md->phys_addr && phys_addr < end) { - phys_addr += md->virt_addr - md->phys_addr; - return (__force void __iomem *)(unsigned long)phys_addr; - } - } - return NULL; -} - static __initdata efi_config_table_type_t common_tables[] = { {ACPI_20_TABLE_GUID, "ACPI 2.0", &efi.acpi20}, {ACPI_TABLE_GUID, "ACPI", &efi.acpi}, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index f4cae5357e40..3e90ddcbb24a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1575,34 +1575,32 @@ void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev) WREG32(mmBIOS_SCRATCH_0 + i, adev->bios_scratch[i]); } -/* Atom needs data in little endian format - * so swap as appropriate when copying data to - * or from atom. Note that atom operates on - * dw units. +/* Atom needs data in little endian format so swap as appropriate when copying + * data to or from atom. Note that atom operates on dw units. + * + * Use to_le=true when sending data to atom and provide at least + * ALIGN(num_bytes,4) bytes in the dst buffer. + * + * Use to_le=false when receiving data from atom and provide ALIGN(num_bytes,4) + * byes in the src buffer. */ void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) { #ifdef __BIG_ENDIAN - u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */ - u32 *dst32, *src32; + u32 src_tmp[5], dst_tmp[5]; int i; + u8 align_num_bytes = ALIGN(num_bytes, 4); - memcpy(src_tmp, src, num_bytes); - src32 = (u32 *)src_tmp; - dst32 = (u32 *)dst_tmp; if (to_le) { - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = cpu_to_le32(src32[i]); - memcpy(dst, dst_tmp, num_bytes); + memcpy(src_tmp, src, num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = cpu_to_le32(src_tmp[i]); + memcpy(dst, dst_tmp, align_num_bytes); } else { - u8 dws = num_bytes & ~3; - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = le32_to_cpu(src32[i]); - memcpy(dst, dst_tmp, dws); - if (num_bytes % 4) { - for (i = 0; i < (num_bytes % 4); i++) - dst[dws+i] = dst_tmp[dws+i]; - } + memcpy(src_tmp, src, align_num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = le32_to_cpu(src_tmp[i]); + memcpy(dst, dst_tmp, num_bytes); } #else memcpy(dst, src, num_bytes); diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index f3bee54c414f..cb4313c68f71 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -440,7 +440,9 @@ static bool gmbus_is_index_read(struct i2c_msg *msgs, int i, int num) { return (i + 1 < num && - !(msgs[i].flags & I2C_M_RD) && msgs[i].len <= 2 && + msgs[i].addr == msgs[i + 1].addr && + !(msgs[i].flags & I2C_M_RD) && + (msgs[i].len == 1 || msgs[i].len == 2) && (msgs[i + 1].flags & I2C_M_RD)); } diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index f97b73ec4713..f418c002d323 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -352,6 +352,7 @@ static int panel_simple_remove(struct device *dev) drm_panel_remove(&panel->base); panel_simple_disable(&panel->base); + panel_simple_unprepare(&panel->base); if (panel->ddc) put_device(&panel->ddc->dev); @@ -367,6 +368,7 @@ static void panel_simple_shutdown(struct device *dev) struct panel_simple *panel = dev_get_drvdata(dev); panel_simple_disable(&panel->base); + panel_simple_unprepare(&panel->base); } static const struct drm_display_mode ampire_am800480r3tmqwa1h_mode = { diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index b5760851195c..0c6216a6ee9e 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -45,34 +45,32 @@ static char *pre_emph_names[] = { /***** radeon AUX functions *****/ -/* Atom needs data in little endian format - * so swap as appropriate when copying data to - * or from atom. Note that atom operates on - * dw units. +/* Atom needs data in little endian format so swap as appropriate when copying + * data to or from atom. Note that atom operates on dw units. + * + * Use to_le=true when sending data to atom and provide at least + * ALIGN(num_bytes,4) bytes in the dst buffer. + * + * Use to_le=false when receiving data from atom and provide ALIGN(num_bytes,4) + * byes in the src buffer. */ void radeon_atom_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) { #ifdef __BIG_ENDIAN - u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */ - u32 *dst32, *src32; + u32 src_tmp[5], dst_tmp[5]; int i; + u8 align_num_bytes = ALIGN(num_bytes, 4); - memcpy(src_tmp, src, num_bytes); - src32 = (u32 *)src_tmp; - dst32 = (u32 *)dst_tmp; if (to_le) { - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = cpu_to_le32(src32[i]); - memcpy(dst, dst_tmp, num_bytes); + memcpy(src_tmp, src, num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = cpu_to_le32(src_tmp[i]); + memcpy(dst, dst_tmp, align_num_bytes); } else { - u8 dws = num_bytes & ~3; - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = le32_to_cpu(src32[i]); - memcpy(dst, dst_tmp, dws); - if (num_bytes % 4) { - for (i = 0; i < (num_bytes % 4); i++) - dst[dws+i] = dst_tmp[dws+i]; - } + memcpy(src_tmp, src, align_num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = le32_to_cpu(src_tmp[i]); + memcpy(dst, dst_tmp, num_bytes); } #else memcpy(dst, src, num_bytes); diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index 26da2f4d7b4f..a2937a693591 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -226,7 +226,6 @@ static int radeonfb_create(struct drm_fb_helper *helper, } info->par = rfbdev; - info->skip_vt_switch = true; ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj); if (ret) { diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index ea47980949ef..4d46f2ce606f 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -479,7 +479,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve, if (b == -1) goto err; - k->ptr[i] = PTR(ca->buckets[b].gen, + k->ptr[i] = MAKE_PTR(ca->buckets[b].gen, bucket_to_sector(c, b), ca->sb.nr_this_dev); diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 243de0bf15cd..4bf15182c4da 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -584,7 +584,7 @@ static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey return false; for (i = 0; i < KEY_PTRS(l); i++) - if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || + if (l->ptr[i] + MAKE_PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i)) return false; diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 29eba7219b01..6ed066a0e7c0 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -508,7 +508,7 @@ static void journal_reclaim(struct cache_set *c) continue; ja->cur_idx = next; - k->ptr[n++] = PTR(0, + k->ptr[n++] = MAKE_PTR(0, bucket_to_sector(c, ca->sb.d[ja->cur_idx]), ca->sb.nr_this_dev); } diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 5d7c0900fa1b..f112c5bc082a 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -257,6 +257,9 @@ static ssize_t at24_read(struct at24_data *at24, if (unlikely(!count)) return count; + if (off + count > at24->chip.byte_len) + return -EINVAL; + /* * Read data from chip, protecting against concurrent updates * from this host, but not from other I2C masters. @@ -311,6 +314,9 @@ static ssize_t at24_eeprom_write(struct at24_data *at24, const char *buf, unsigned long timeout, write_time; unsigned next_page; + if (offset + count > at24->chip.byte_len) + return -EINVAL; + /* Get corresponding I2C address and adjust offset */ client = at24_translate_offset(at24, &offset); diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index 972ff844cf5a..cf7c7bc1e940 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -155,6 +155,9 @@ static int mmc_bus_suspend(struct device *dev) return ret; ret = host->bus_ops->suspend(host); + if (ret) + pm_generic_resume(dev); + return ret; } diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 54ab48827258..7ba109e8cf88 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2663,15 +2663,18 @@ static int panic_nand_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const uint8_t *buf) { struct nand_chip *chip = mtd->priv; + int chipnr = (int)(to >> chip->chip_shift); struct mtd_oob_ops ops; int ret; - /* Wait for the device to get ready */ - panic_nand_wait(mtd, chip, 400); - /* Grab the device */ panic_nand_get_device(chip, mtd, FL_WRITING); + chip->select_chip(mtd, chipnr); + + /* Wait for the device to get ready */ + panic_nand_wait(mtd, chip, 400); + memset(&ops, 0, sizeof(ops)); ops.len = len; ops.datbuf = (uint8_t *)buf; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c36a03fa7678..260f94b019c9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3361,13 +3361,6 @@ again: goto again; } - /* We've already setup this transaction, go ahead and exit */ - if (block_group->cache_generation == trans->transid && - i_size_read(inode)) { - dcs = BTRFS_DC_SETUP; - goto out_put; - } - /* * We want to set the generation to 0, that way if anything goes wrong * from here on out we know not to trust this cache when we load up next @@ -3391,6 +3384,13 @@ again: } WARN_ON(ret); + /* We've already setup this transaction, go ahead and exit */ + if (block_group->cache_generation == trans->transid && + i_size_read(inode)) { + dcs = BTRFS_DC_SETUP; + goto out_put; + } + if (i_size_read(inode) > 0) { ret = btrfs_check_trunc_cache_free_space(root, &root->fs_info->global_block_rsv); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 348e0a05bd18..44e09483d2cd 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1260,7 +1260,7 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) return 0; } - error = nfs_revalidate_inode(NFS_SERVER(inode), inode); + error = nfs_lookup_verify_inode(inode, flags); dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n", __func__, inode->i_ino, error ? "invalid" : "valid"); return !error; @@ -1420,6 +1420,7 @@ static int nfs4_lookup_revalidate(struct dentry *, unsigned int); const struct dentry_operations nfs4_dentry_operations = { .d_revalidate = nfs4_lookup_revalidate, + .d_weak_revalidate = nfs_weak_revalidate, .d_delete = nfs_dentry_delete, .d_iput = nfs_dentry_iput, .d_automount = nfs_d_automount, diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 421935f3d909..11c67e8b939d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3379,7 +3379,9 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) /* ignore lock owners */ if (local->st_stateowner->so_is_open_owner == 0) continue; - if (local->st_stateowner == &oo->oo_owner) { + if (local->st_stateowner != &oo->oo_owner) + continue; + if (local->st_stid.sc_type == NFS4_OPEN_STID) { ret = local; atomic_inc(&ret->st_stid.sc_count); break; @@ -3388,6 +3390,52 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) return ret; } +static __be32 +nfsd4_verify_open_stid(struct nfs4_stid *s) +{ + __be32 ret = nfs_ok; + + switch (s->sc_type) { + default: + break; + case NFS4_CLOSED_STID: + case NFS4_CLOSED_DELEG_STID: + ret = nfserr_bad_stateid; + break; + case NFS4_REVOKED_DELEG_STID: + ret = nfserr_deleg_revoked; + } + return ret; +} + +/* Lock the stateid st_mutex, and deal with races with CLOSE */ +static __be32 +nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp) +{ + __be32 ret; + + mutex_lock(&stp->st_mutex); + ret = nfsd4_verify_open_stid(&stp->st_stid); + if (ret != nfs_ok) + mutex_unlock(&stp->st_mutex); + return ret; +} + +static struct nfs4_ol_stateid * +nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) +{ + struct nfs4_ol_stateid *stp; + for (;;) { + spin_lock(&fp->fi_lock); + stp = nfsd4_find_existing_open(fp, open); + spin_unlock(&fp->fi_lock); + if (!stp || nfsd4_lock_ol_stateid(stp) == nfs_ok) + break; + nfs4_put_stid(&stp->st_stid); + } + return stp; +} + static struct nfs4_openowner * alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, struct nfsd4_compound_state *cstate) @@ -3420,23 +3468,27 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, } static struct nfs4_ol_stateid * -init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, - struct nfsd4_open *open) +init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_openowner *oo = open->op_openowner; struct nfs4_ol_stateid *retstp = NULL; + struct nfs4_ol_stateid *stp; + stp = open->op_stp; /* We are moving these outside of the spinlocks to avoid the warnings */ mutex_init(&stp->st_mutex); mutex_lock(&stp->st_mutex); +retry: spin_lock(&oo->oo_owner.so_client->cl_lock); spin_lock(&fp->fi_lock); retstp = nfsd4_find_existing_open(fp, open); if (retstp) goto out_unlock; + + open->op_stp = NULL; atomic_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_locks); @@ -3453,11 +3505,16 @@ out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&oo->oo_owner.so_client->cl_lock); if (retstp) { - mutex_lock(&retstp->st_mutex); - /* Not that we need to, just for neatness */ + /* Handle races with CLOSE */ + if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) { + nfs4_put_stid(&retstp->st_stid); + goto retry; + } + /* To keep mutex tracking happy */ mutex_unlock(&stp->st_mutex); + stp = retstp; } - return retstp; + return stp; } /* @@ -4260,9 +4317,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; struct nfs4_file *fp = NULL; struct nfs4_ol_stateid *stp = NULL; - struct nfs4_ol_stateid *swapstp = NULL; struct nfs4_delegation *dp = NULL; __be32 status; + bool new_stp = false; /* * Lookup file; if found, lookup stateid and check open request, @@ -4274,9 +4331,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; - spin_lock(&fp->fi_lock); - stp = nfsd4_find_existing_open(fp, open); - spin_unlock(&fp->fi_lock); + stp = nfsd4_find_and_lock_existing_open(fp, open); } else { open->op_file = NULL; status = nfserr_bad_stateid; @@ -4284,41 +4339,31 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf goto out; } + if (!stp) { + stp = init_open_stateid(fp, open); + if (!open->op_stp) + new_stp = true; + } + /* * OPEN the file, or upgrade an existing OPEN. * If truncate fails, the OPEN fails. + * + * stp is already locked. */ - if (stp) { + if (!new_stp) { /* Stateid was found, this is an OPEN upgrade */ - mutex_lock(&stp->st_mutex); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { mutex_unlock(&stp->st_mutex); goto out; } } else { - stp = open->op_stp; - open->op_stp = NULL; - /* - * init_open_stateid() either returns a locked stateid - * it found, or initializes and locks the new one we passed in - */ - swapstp = init_open_stateid(stp, fp, open); - if (swapstp) { - nfs4_put_stid(&stp->st_stid); - stp = swapstp; - status = nfs4_upgrade_open(rqstp, fp, current_fh, - stp, open); - if (status) { - mutex_unlock(&stp->st_mutex); - goto out; - } - goto upgrade_out; - } status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { - mutex_unlock(&stp->st_mutex); + stp->st_stid.sc_type = NFS4_CLOSED_STID; release_open_stateid(stp); + mutex_unlock(&stp->st_mutex); goto out; } @@ -4327,7 +4372,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (stp->st_clnt_odstate == open->op_odstate) open->op_odstate = NULL; } -upgrade_out: + nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid); mutex_unlock(&stp->st_mutex); @@ -5153,7 +5198,6 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) bool unhashed; LIST_HEAD(reaplist); - s->st_stid.sc_type = NFS4_CLOSED_STID; spin_lock(&clp->cl_lock); unhashed = unhash_open_stateid(s, &reaplist); @@ -5192,10 +5236,12 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_bump_seqid(cstate, status); if (status) goto out; + + stp->st_stid.sc_type = NFS4_CLOSED_STID; nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); - mutex_unlock(&stp->st_mutex); nfsd4_close_open_stateid(stp); + mutex_unlock(&stp->st_mutex); /* put reference from nfs4_preprocess_seqid_op */ nfs4_put_stid(&stp->st_stid); diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 639e9b8b0e4d..0b41959aab9f 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -131,6 +131,7 @@ netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask) struct netlink_callback { struct sk_buff *skb; const struct nlmsghdr *nlh; + int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); @@ -153,6 +154,7 @@ struct nlmsghdr * __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags); struct netlink_dump_control { + int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff *skb, struct netlink_callback *); int (*done)(struct netlink_callback *); void *data; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 1b6b6dcb018d..43c0e771f417 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -114,6 +114,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) * @flags: flags * @policy: attribute validation policy * @doit: standard command callback + * @start: start callback for dumps * @dumpit: callback for dumpers * @done: completion callback for dumps * @ops_list: operations list @@ -122,6 +123,7 @@ struct genl_ops { const struct nla_policy *policy; int (*doit)(struct sk_buff *skb, struct genl_info *info); + int (*start)(struct netlink_callback *cb); int (*dumpit)(struct sk_buff *skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index 22b6ad31c706..8562b1cb776b 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -90,7 +90,7 @@ PTR_FIELD(PTR_GEN, 0, 8) #define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1) -#define PTR(gen, offset, dev) \ +#define MAKE_PTR(gen, offset, dev) \ ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen) /* Bkey utility code */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 6c6f5ccfcda1..8f3769ec8575 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1304,17 +1304,11 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, VM_BUG_ON_PAGE(!PageHead(page), page); if (flags & FOLL_TOUCH) { pmd_t _pmd; - /* - * We should set the dirty bit only for FOLL_WRITE but - * for now the dirty bit in the pmd is meaningless. - * And if the dirty bit will become meaningful and - * we'll only set it with FOLL_WRITE, an atomic - * set_bit will be required on the pmd to set the - * young bit, instead of the current set_pmd_at. - */ - _pmd = pmd_mkyoung(pmd_mkdirty(*pmd)); + _pmd = pmd_mkyoung(*pmd); + if (flags & FOLL_WRITE) + _pmd = pmd_mkdirty(_pmd); if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK, - pmd, _pmd, 1)) + pmd, _pmd, flags & FOLL_WRITE)) update_mmu_cache_pmd(vma, addr, pmd); } if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { diff --git a/mm/madvise.c b/mm/madvise.c index c889fcbb530e..2a0f9a4504f1 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -223,15 +223,14 @@ static long madvise_willneed(struct vm_area_struct *vma, { struct file *file = vma->vm_file; + *prev = vma; #ifdef CONFIG_SWAP if (!file) { - *prev = vma; force_swapin_readahead(vma, start, end); return 0; } if (shmem_mapping(file->f_mapping)) { - *prev = vma; force_shm_swapin_readahead(vma, start, end, file->f_mapping); return 0; @@ -246,7 +245,6 @@ static long madvise_willneed(struct vm_area_struct *vma, return 0; } - *prev = vma; start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; if (end > vma->vm_end) end = vma->vm_end; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9ecdd61c6463..a87afc4f3c91 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2203,6 +2203,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, cb = &nlk->cb; memset(cb, 0, sizeof(*cb)); + cb->start = control->start; cb->dump = control->dump; cb->done = control->done; cb->nlh = nlh; @@ -2216,6 +2217,9 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, mutex_unlock(nlk->cb_mutex); + if (cb->start) + cb->start(cb); + ret = netlink_dump(sk); sock_put(sk); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index bc0e504f33a6..8e63662c6fb0 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -513,6 +513,20 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, } EXPORT_SYMBOL(genlmsg_put); +static int genl_lock_start(struct netlink_callback *cb) +{ + /* our ops are always const - netlink API doesn't propagate that */ + const struct genl_ops *ops = cb->data; + int rc = 0; + + if (ops->start) { + genl_lock(); + rc = ops->start(cb); + genl_unlock(); + } + return rc; +} + static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { /* our ops are always const - netlink API doesn't propagate that */ @@ -577,6 +591,7 @@ static int genl_family_rcv_msg(struct genl_family *family, .module = family->module, /* we have const, but the netlink API doesn't */ .data = (void *)ops, + .start = genl_lock_start, .dump = genl_lock_dumpit, .done = genl_lock_done, }; @@ -588,6 +603,7 @@ static int genl_family_rcv_msg(struct genl_family *family, } else { struct netlink_dump_control c = { .module = family->module, + .start = ops->start, .dump = ops->dumpit, .done = ops->done, }; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7a5a64e70b4d..76944a4839a5 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1652,32 +1652,34 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr static int xfrm_dump_policy_done(struct netlink_callback *cb) { - struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args; struct net *net = sock_net(cb->skb->sk); xfrm_policy_walk_done(walk, net); return 0; } +static int xfrm_dump_policy_start(struct netlink_callback *cb) +{ + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args; + + BUILD_BUG_ON(sizeof(*walk) > sizeof(cb->args)); + + xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY); + return 0; +} + static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args; struct xfrm_dump_info info; - BUILD_BUG_ON(sizeof(struct xfrm_policy_walk) > - sizeof(cb->args) - sizeof(cb->args[0])); - info.in_skb = cb->skb; info.out_skb = skb; info.nlmsg_seq = cb->nlh->nlmsg_seq; info.nlmsg_flags = NLM_F_MULTI; - if (!cb->args[0]) { - cb->args[0] = 1; - xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY); - } - (void) xfrm_policy_walk(net, walk, dump_one_policy, &info); return skb->len; @@ -2415,6 +2417,7 @@ static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { static const struct xfrm_link { int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); + int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff *, struct netlink_callback *); int (*done)(struct netlink_callback *); const struct nla_policy *nla_pol; @@ -2428,6 +2431,7 @@ static const struct xfrm_link { [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_add_policy }, [XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy }, [XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy, + .start = xfrm_dump_policy_start, .dump = xfrm_dump_policy, .done = xfrm_dump_policy_done }, [XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = { .doit = xfrm_alloc_userspi }, @@ -2479,6 +2483,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { struct netlink_dump_control c = { + .start = link->start, .dump = link->dump, .done = link->done, };