From e17b2f114cba5420fb28fa4bfead57d406a16533 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 20 Jan 2014 11:30:41 +0000 Subject: xen: swiotlb: handle sizeof(dma_addr_t) != sizeof(phys_addr_t) The use of phys_to_machine and machine_to_phys in the phys<=>bus conversions causes us to lose the top bits of the DMA address if the size of a DMA address is not the same as the size of the phyiscal address. This can happen in practice on ARM where foreign pages can be above 4GB even though the local kernel does not have LPAE page tables enabled (which is totally reasonable if the guest does not itself have >4GB of RAM). In this case the kernel still maps the foreign pages at a phys addr below 4G (as it must) but the resulting DMA address (returned by the grant map operation) is much higher. This is analogous to a hardware device which has its view of RAM mapped up high for some reason. This patch makes I/O to foreign pages (specifically blkif) work on 32-bit ARM systems with more than 4GB of RAM. Signed-off-by: Ian Campbell Signed-off-by: Stefano Stabellini --- drivers/xen/swiotlb-xen.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 1eac0731c349..ebd8f218a788 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -75,14 +75,32 @@ static unsigned long xen_io_tlb_nslabs; static u64 start_dma_addr; +/* + * Both of these functions should avoid PFN_PHYS because phys_addr_t + * can be 32bit when dma_addr_t is 64bit leading to a loss in + * information if the shift is done before casting to 64bit. + */ static inline dma_addr_t xen_phys_to_bus(phys_addr_t paddr) { - return phys_to_machine(XPADDR(paddr)).maddr; + unsigned long mfn = pfn_to_mfn(PFN_DOWN(paddr)); + dma_addr_t dma = (dma_addr_t)mfn << PAGE_SHIFT; + + dma |= paddr & ~PAGE_MASK; + + return dma; } static inline phys_addr_t xen_bus_to_phys(dma_addr_t baddr) { - return machine_to_phys(XMADDR(baddr)).paddr; + unsigned long pfn = mfn_to_pfn(PFN_DOWN(baddr)); + dma_addr_t dma = (dma_addr_t)pfn << PAGE_SHIFT; + phys_addr_t paddr = dma; + + BUG_ON(paddr != dma); /* truncation has occurred, should never happen */ + + paddr |= baddr & ~PAGE_MASK; + + return paddr; } static inline dma_addr_t xen_virt_to_bus(void *address) -- cgit v1.2.3 From 47c542050d306e50f09512eb6339dbf2fc02fddd Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Thu, 30 Jan 2014 12:56:34 +0000 Subject: xen/gnttab: Use phys_addr_t to describe the grant frame base address On ARM, address size can be 32 bits or 64 bits (if CONFIG_ARCH_PHYS_ADDR_T_64BIT is enabled). We can't assume that the grant frame base address will always fits in an unsigned long. Use phys_addr_t instead of unsigned long as argument for gnttab_setup_auto_xlat_frames. Signed-off-by: Julien Grall Signed-off-by: Stefano Stabellini Acked-by: Ian Campbell Reviewed-by: David Vrabel --- drivers/xen/grant-table.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 1ce1c40331f3..b84e3ab839aa 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -837,7 +837,7 @@ unsigned int gnttab_max_grant_frames(void) } EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); -int gnttab_setup_auto_xlat_frames(unsigned long addr) +int gnttab_setup_auto_xlat_frames(phys_addr_t addr) { xen_pfn_t *pfn; unsigned int max_nr_gframes = __max_nr_grant_frames(); @@ -849,8 +849,8 @@ int gnttab_setup_auto_xlat_frames(unsigned long addr) vaddr = xen_remap(addr, PAGE_SIZE * max_nr_gframes); if (vaddr == NULL) { - pr_warn("Failed to ioremap gnttab share frames (addr=0x%08lx)!\n", - addr); + pr_warn("Failed to ioremap gnttab share frames (addr=%pa)!\n", + &addr); return -ENOMEM; } pfn = kcalloc(max_nr_gframes, sizeof(pfn[0]), GFP_KERNEL); -- cgit v1.2.3 From 08ece5bb2312b4510b161a6ef6682f37f4eac8a1 Mon Sep 17 00:00:00 2001 From: Zoltan Kiss Date: Thu, 23 Jan 2014 21:23:44 +0000 Subject: xen/grant-table: Avoid m2p_override during mapping The grant mapping API does m2p_override unnecessarily: only gntdev needs it, for blkback and future netback patches it just cause a lock contention, as those pages never go to userspace. Therefore this series does the following: - the original functions were renamed to __gnttab_[un]map_refs, with a new parameter m2p_override - based on m2p_override either they follow the original behaviour, or just set the private flag and call set_phys_to_machine - gnttab_[un]map_refs are now a wrapper to call __gnttab_[un]map_refs with m2p_override false - a new function gnttab_[un]map_refs_userspace provides the old behaviour It also removes a stray space from page.h and change ret to 0 if XENFEAT_auto_translated_physmap, as that is the only possible return value there. v2: - move the storing of the old mfn in page->index to gnttab_map_refs - move the function header update to a separate patch v3: - a new approach to retain old behaviour where it needed - squash the patches into one v4: - move out the common bits from m2p* functions, and pass pfn/mfn as parameter - clear page->private before doing anything with the page, so m2p_find_override won't race with this v5: - change return value handling in __gnttab_[un]map_refs - remove a stray space in page.h - add detail why ret = 0 now at some places v6: - don't pass pfn to m2p* functions, just get it locally Signed-off-by: Zoltan Kiss Suggested-by: David Vrabel Acked-by: David Vrabel Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/gntdev.c | 13 ++++--- drivers/xen/grant-table.c | 89 ++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 84 insertions(+), 18 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 073b4a19a8b0..34a2704fbc88 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -284,8 +284,10 @@ static int map_grant_pages(struct grant_map *map) } pr_debug("map %d+%d\n", map->index, map->count); - err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL, - map->pages, map->count); + err = gnttab_map_refs_userspace(map->map_ops, + use_ptemod ? map->kmap_ops : NULL, + map->pages, + map->count); if (err) return err; @@ -315,9 +317,10 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) } } - err = gnttab_unmap_refs(map->unmap_ops + offset, - use_ptemod ? map->kmap_ops + offset : NULL, map->pages + offset, - pages); + err = gnttab_unmap_refs_userspace(map->unmap_ops + offset, + use_ptemod ? map->kmap_ops + offset : NULL, + map->pages + offset, + pages); if (err) return err; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index b84e3ab839aa..8ee13e2e45e2 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -928,15 +928,17 @@ void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count) } EXPORT_SYMBOL_GPL(gnttab_batch_copy); -int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, +int __gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, - struct page **pages, unsigned int count) + struct page **pages, unsigned int count, + bool m2p_override) { int i, ret; bool lazy = false; pte_t *pte; - unsigned long mfn; + unsigned long mfn, pfn; + BUG_ON(kmap_ops && !m2p_override); ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count); if (ret) return ret; @@ -955,10 +957,12 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, set_phys_to_machine(map_ops[i].host_addr >> PAGE_SHIFT, map_ops[i].dev_bus_addr >> PAGE_SHIFT); } - return ret; + return 0; } - if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { + if (m2p_override && + !in_interrupt() && + paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { arch_enter_lazy_mmu_mode(); lazy = true; } @@ -975,8 +979,20 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, } else { mfn = PFN_DOWN(map_ops[i].dev_bus_addr); } - ret = m2p_add_override(mfn, pages[i], kmap_ops ? - &kmap_ops[i] : NULL); + pfn = page_to_pfn(pages[i]); + + WARN_ON(PagePrivate(pages[i])); + SetPagePrivate(pages[i]); + set_page_private(pages[i], mfn); + + pages[i]->index = pfn_to_mfn(pfn); + if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { + ret = -ENOMEM; + goto out; + } + if (m2p_override) + ret = m2p_add_override(mfn, pages[i], kmap_ops ? + &kmap_ops[i] : NULL); if (ret) goto out; } @@ -987,15 +1003,32 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, return ret; } + +int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, + struct page **pages, unsigned int count) +{ + return __gnttab_map_refs(map_ops, NULL, pages, count, false); +} EXPORT_SYMBOL_GPL(gnttab_map_refs); -int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, +int gnttab_map_refs_userspace(struct gnttab_map_grant_ref *map_ops, + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count) +{ + return __gnttab_map_refs(map_ops, kmap_ops, pages, count, true); +} +EXPORT_SYMBOL_GPL(gnttab_map_refs_userspace); + +int __gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_map_grant_ref *kmap_ops, - struct page **pages, unsigned int count) + struct page **pages, unsigned int count, + bool m2p_override) { int i, ret; bool lazy = false; + unsigned long pfn, mfn; + BUG_ON(kmap_ops && !m2p_override); ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count); if (ret) return ret; @@ -1006,17 +1039,33 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, set_phys_to_machine(unmap_ops[i].host_addr >> PAGE_SHIFT, INVALID_P2M_ENTRY); } - return ret; + return 0; } - if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { + if (m2p_override && + !in_interrupt() && + paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { arch_enter_lazy_mmu_mode(); lazy = true; } for (i = 0; i < count; i++) { - ret = m2p_remove_override(pages[i], kmap_ops ? - &kmap_ops[i] : NULL); + pfn = page_to_pfn(pages[i]); + mfn = get_phys_to_machine(pfn); + if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { + ret = -EINVAL; + goto out; + } + + set_page_private(pages[i], INVALID_P2M_ENTRY); + WARN_ON(!PagePrivate(pages[i])); + ClearPagePrivate(pages[i]); + set_phys_to_machine(pfn, pages[i]->index); + if (m2p_override) + ret = m2p_remove_override(pages[i], + kmap_ops ? + &kmap_ops[i] : NULL, + mfn); if (ret) goto out; } @@ -1027,8 +1076,22 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, return ret; } + +int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *map_ops, + struct page **pages, unsigned int count) +{ + return __gnttab_unmap_refs(map_ops, NULL, pages, count, false); +} EXPORT_SYMBOL_GPL(gnttab_unmap_refs); +int gnttab_unmap_refs_userspace(struct gnttab_unmap_grant_ref *map_ops, + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count) +{ + return __gnttab_unmap_refs(map_ops, kmap_ops, pages, count, true); +} +EXPORT_SYMBOL_GPL(gnttab_unmap_refs_userspace); + static unsigned nr_status_frames(unsigned nr_grant_frames) { BUG_ON(grefs_per_grant_frame == 0); -- cgit v1.2.3 From bc1b0df59e3fc4573f92bc1aab9652047a0aeaa7 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Wed, 22 Jan 2014 14:57:44 +0800 Subject: drivers: xen: deaggressive selfballoon driver Current xen-selfballoon driver is too aggressive which may cause OOM be triggered more often. Eg. this bug reported by James: https://lkml.org/lkml/2013/11/21/158 There are two mainly reasons: 1) The original goal_page didn't consider some pages used by kernel space, like slab pages and pages used by device drivers. 2) The balloon driver may not give back memory to guest OS fast enough when the workload suddenly aquries a lot of physical memory. In both cases, the guest OS will suffer from memory pressure and OOM may be triggered. The fix is make xen-selfballoon driver not that aggressive by adding extra 10% of total ram pages to goal_page. It's more valuable to keep the guest system reliable and response faster than balloon out these 10% pages to XEN. Signed-off-by: Bob Liu Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-selfballoon.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 21e18c18c7a1..745ad79c1d8e 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -175,6 +175,7 @@ static void frontswap_selfshrink(void) #endif /* CONFIG_FRONTSWAP */ #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) +#define PAGES2MB(pages) ((pages) >> (20 - PAGE_SHIFT)) /* * Use current balloon size, the goal (vm_committed_as), and hysteresis @@ -525,6 +526,7 @@ EXPORT_SYMBOL(register_xen_selfballooning); int xen_selfballoon_init(bool use_selfballooning, bool use_frontswap_selfshrink) { bool enable = false; + unsigned long reserve_pages; if (!xen_domain()) return -ENODEV; @@ -549,6 +551,26 @@ int xen_selfballoon_init(bool use_selfballooning, bool use_frontswap_selfshrink) if (!enable) return -ENODEV; + /* + * Give selfballoon_reserved_mb a default value(10% of total ram pages) + * to make selfballoon not so aggressive. + * + * There are mainly two reasons: + * 1) The original goal_page didn't consider some pages used by kernel + * space, like slab pages and memory used by device drivers. + * + * 2) The balloon driver may not give back memory to guest OS fast + * enough when the workload suddenly aquries a lot of physical memory. + * + * In both cases, the guest OS will suffer from memory pressure and + * OOM killer may be triggered. + * By reserving extra 10% of total ram pages, we can keep the system + * much more reliably and response faster in some cases. + */ + if (!selfballoon_reserved_mb) { + reserve_pages = totalram_pages / 10; + selfballoon_reserved_mb = PAGES2MB(reserve_pages); + } schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ); return 0; -- cgit v1.2.3