diff options
author | Pekka Enberg <penberg@kernel.org> | 2010-10-24 19:57:05 +0300 |
---|---|---|
committer | Pekka Enberg <penberg@kernel.org> | 2010-10-24 19:57:05 +0300 |
commit | 6d4121f6c20a0e86231d52f535f1c82423b3326f (patch) | |
tree | 5c235cac699ca86b504850aa663ddadde0455a61 /include | |
parent | 92a5bbc11ff2442a54b2f1d313088c245828ef4e (diff) | |
parent | 35da7a307c535f9c2929cae277f3df425c9f9b1e (diff) |
Merge branch 'master' into for-linus
Conflicts:
include/linux/percpu.h
mm/percpu.c
Diffstat (limited to 'include')
287 files changed, 9035 insertions, 2623 deletions
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index c0786d446a00..984cdc62e30b 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -55,7 +55,7 @@ extern u8 acpi_gbl_permanent_mmap; /* - * Globals that are publically available, allowing for + * Globals that are publicly available, allowing for * run time configuration */ extern u32 acpi_dbg_level; diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index e53347fbf1da..e994197f84b7 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -43,6 +43,7 @@ */ #define atomic_set(v, i) (((v)->counter) = (i)) +#include <linux/irqflags.h> #include <asm/system.h> /** @@ -57,7 +58,7 @@ static inline int atomic_add_return(int i, atomic_t *v) unsigned long flags; int temp; - raw_local_irq_save(flags); /* Don't trace it in a irqsoff handler */ + raw_local_irq_save(flags); /* Don't trace it in an irqsoff handler */ temp = v->counter; temp += i; v->counter = temp; @@ -78,7 +79,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) unsigned long flags; int temp; - raw_local_irq_save(flags); /* Don't trace it in a irqsoff handler */ + raw_local_irq_save(flags); /* Don't trace it in an irqsoff handler */ temp = v->counter; temp -= i; v->counter = temp; @@ -119,14 +120,23 @@ static inline void atomic_dec(atomic_t *v) #define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0) #define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0) -#define atomic_add_unless(v, a, u) \ -({ \ - int c, old; \ - c = atomic_read(v); \ - while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \ - c = old; \ - c != (u); \ -}) +#define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v))) +#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) + +#define cmpxchg_local(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\ + (unsigned long)(n), sizeof(*(ptr)))) + +#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) + +static inline int atomic_add_unless(atomic_t *v, int a, int u) +{ + int c, old; + c = atomic_read(v); + while (c != u && (old = atomic_cmpxchg(v, c, c + a)) != c) + c = old; + return c != u; +} #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) @@ -140,15 +150,6 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) raw_local_irq_restore(flags); } -#define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v))) -#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) - -#define cmpxchg_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\ - (unsigned long)(n), sizeof(*(ptr)))) - -#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) - /* Assume that atomic operations are already serializing */ #define smp_mb__before_atomic_dec() barrier() #define smp_mb__after_atomic_dec() barrier() diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h index 1914e9742512..110fa700f853 100644 --- a/include/asm-generic/bitops/find.h +++ b/include/asm-generic/bitops/find.h @@ -1,15 +1,50 @@ #ifndef _ASM_GENERIC_BITOPS_FIND_H_ #define _ASM_GENERIC_BITOPS_FIND_H_ -#ifndef CONFIG_GENERIC_FIND_NEXT_BIT +/** + * find_next_bit - find the next set bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + */ extern unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset); +/** + * find_next_zero_bit - find the next cleared bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + */ extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset); -#endif + +#ifdef CONFIG_GENERIC_FIND_FIRST_BIT + +/** + * find_first_bit - find the first set bit in a memory region + * @addr: The address to start the search at + * @size: The maximum size to search + * + * Returns the bit number of the first set bit. + */ +extern unsigned long find_first_bit(const unsigned long *addr, + unsigned long size); + +/** + * find_first_zero_bit - find the first cleared bit in a memory region + * @addr: The address to start the search at + * @size: The maximum size to search + * + * Returns the bit number of the first cleared bit. + */ +extern unsigned long find_first_zero_bit(const unsigned long *addr, + unsigned long size); +#else /* CONFIG_GENERIC_FIND_FIRST_BIT */ #define find_first_bit(addr, size) find_next_bit((addr), (size), 0) #define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) +#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ + #endif /*_ASM_GENERIC_BITOPS_FIND_H_ */ diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h index b2ba2fc8829a..2533fddd34a6 100644 --- a/include/asm-generic/cmpxchg-local.h +++ b/include/asm-generic/cmpxchg-local.h @@ -2,6 +2,7 @@ #define __ASM_GENERIC_CMPXCHG_LOCAL_H #include <linux/types.h> +#include <linux/irqflags.h> extern unsigned long wrong_size_cmpxchg(volatile void *ptr); diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h index a70b2d2bfc14..0fc16e3f0bfc 100644 --- a/include/asm-generic/fcntl.h +++ b/include/asm-generic/fcntl.h @@ -122,7 +122,7 @@ struct f_owner_ex { int type; - pid_t pid; + __kernel_pid_t pid; }; /* for F_[GET|SET]FL */ diff --git a/include/asm-generic/hardirq.h b/include/asm-generic/hardirq.h index 62f59080e5cc..04d0a977cd43 100644 --- a/include/asm-generic/hardirq.h +++ b/include/asm-generic/hardirq.h @@ -3,13 +3,13 @@ #include <linux/cache.h> #include <linux/threads.h> -#include <linux/irq.h> typedef struct { unsigned int __softirq_pending; } ____cacheline_aligned irq_cpustat_t; #include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ +#include <linux/irq.h> #ifndef ack_bad_irq static inline void ack_bad_irq(unsigned int irq) diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 118601fce92d..3577ca11a0be 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -19,7 +19,9 @@ #include <asm-generic/iomap.h> #endif +#ifndef mmiowb #define mmiowb() do {} while (0) +#endif /*****************************************************************************/ /* @@ -28,39 +30,51 @@ * differently. On the simple architectures, we just read/write the * memory location directly. */ +#ifndef __raw_readb static inline u8 __raw_readb(const volatile void __iomem *addr) { return *(const volatile u8 __force *) addr; } +#endif +#ifndef __raw_readw static inline u16 __raw_readw(const volatile void __iomem *addr) { return *(const volatile u16 __force *) addr; } +#endif +#ifndef __raw_readl static inline u32 __raw_readl(const volatile void __iomem *addr) { return *(const volatile u32 __force *) addr; } +#endif #define readb __raw_readb #define readw(addr) __le16_to_cpu(__raw_readw(addr)) #define readl(addr) __le32_to_cpu(__raw_readl(addr)) +#ifndef __raw_writeb static inline void __raw_writeb(u8 b, volatile void __iomem *addr) { *(volatile u8 __force *) addr = b; } +#endif +#ifndef __raw_writew static inline void __raw_writew(u16 b, volatile void __iomem *addr) { *(volatile u16 __force *) addr = b; } +#endif +#ifndef __raw_writel static inline void __raw_writel(u32 b, volatile void __iomem *addr) { *(volatile u32 __force *) addr = b; } +#endif #define writeb __raw_writeb #define writew(b,addr) __raw_writew(__cpu_to_le16(b),addr) @@ -122,6 +136,7 @@ static inline void outl(u32 b, unsigned long addr) #define outw_p(x, addr) outw((x), (addr)) #define outl_p(x, addr) outl((x), (addr)) +#ifndef insb static inline void insb(unsigned long addr, void *buffer, int count) { if (count) { @@ -132,7 +147,9 @@ static inline void insb(unsigned long addr, void *buffer, int count) } while (--count); } } +#endif +#ifndef insw static inline void insw(unsigned long addr, void *buffer, int count) { if (count) { @@ -143,7 +160,9 @@ static inline void insw(unsigned long addr, void *buffer, int count) } while (--count); } } +#endif +#ifndef insl static inline void insl(unsigned long addr, void *buffer, int count) { if (count) { @@ -154,7 +173,9 @@ static inline void insl(unsigned long addr, void *buffer, int count) } while (--count); } } +#endif +#ifndef outsb static inline void outsb(unsigned long addr, const void *buffer, int count) { if (count) { @@ -164,7 +185,9 @@ static inline void outsb(unsigned long addr, const void *buffer, int count) } while (--count); } } +#endif +#ifndef outsw static inline void outsw(unsigned long addr, const void *buffer, int count) { if (count) { @@ -174,7 +197,9 @@ static inline void outsw(unsigned long addr, const void *buffer, int count) } while (--count); } } +#endif +#ifndef outsl static inline void outsl(unsigned long addr, const void *buffer, int count) { if (count) { @@ -184,6 +209,7 @@ static inline void outsl(unsigned long addr, const void *buffer, int count) } while (--count); } } +#endif #ifndef CONFIG_GENERIC_IOMAP #define ioread8(addr) readb(addr) diff --git a/include/asm-generic/ioctls.h b/include/asm-generic/ioctls.h index 8554cb6a81b9..a3216655d657 100644 --- a/include/asm-generic/ioctls.h +++ b/include/asm-generic/ioctls.h @@ -62,7 +62,9 @@ #define TCSETSW2 _IOW('T', 0x2C, struct termios2) #define TCSETSF2 _IOW('T', 0x2D, struct termios2) #define TIOCGRS485 0x542E +#ifndef TIOCSRS485 #define TIOCSRS485 0x542F +#endif #define TIOCGPTN _IOR('T', 0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ #define TIOCSPTLCK _IOW('T', 0x31, int) /* Lock/unlock Pty */ #define TCGETX 0x5432 /* SYS5 TCGETX compatibility */ diff --git a/include/asm-generic/irqflags.h b/include/asm-generic/irqflags.h index 9aebf618275a..1f40d0024cf3 100644 --- a/include/asm-generic/irqflags.h +++ b/include/asm-generic/irqflags.h @@ -5,68 +5,62 @@ * All architectures should implement at least the first two functions, * usually inline assembly will be the best way. */ -#ifndef RAW_IRQ_DISABLED -#define RAW_IRQ_DISABLED 0 -#define RAW_IRQ_ENABLED 1 +#ifndef ARCH_IRQ_DISABLED +#define ARCH_IRQ_DISABLED 0 +#define ARCH_IRQ_ENABLED 1 #endif /* read interrupt enabled status */ -#ifndef __raw_local_save_flags -unsigned long __raw_local_save_flags(void); +#ifndef arch_local_save_flags +unsigned long arch_local_save_flags(void); #endif /* set interrupt enabled status */ -#ifndef raw_local_irq_restore -void raw_local_irq_restore(unsigned long flags); +#ifndef arch_local_irq_restore +void arch_local_irq_restore(unsigned long flags); #endif /* get status and disable interrupts */ -#ifndef __raw_local_irq_save -static inline unsigned long __raw_local_irq_save(void) +#ifndef arch_local_irq_save +static inline unsigned long arch_local_irq_save(void) { unsigned long flags; - flags = __raw_local_save_flags(); - raw_local_irq_restore(RAW_IRQ_DISABLED); + flags = arch_local_save_flags(); + arch_local_irq_restore(ARCH_IRQ_DISABLED); return flags; } #endif /* test flags */ -#ifndef raw_irqs_disabled_flags -static inline int raw_irqs_disabled_flags(unsigned long flags) +#ifndef arch_irqs_disabled_flags +static inline int arch_irqs_disabled_flags(unsigned long flags) { - return flags == RAW_IRQ_DISABLED; + return flags == ARCH_IRQ_DISABLED; } #endif /* unconditionally enable interrupts */ -#ifndef raw_local_irq_enable -static inline void raw_local_irq_enable(void) +#ifndef arch_local_irq_enable +static inline void arch_local_irq_enable(void) { - raw_local_irq_restore(RAW_IRQ_ENABLED); + arch_local_irq_restore(ARCH_IRQ_ENABLED); } #endif /* unconditionally disable interrupts */ -#ifndef raw_local_irq_disable -static inline void raw_local_irq_disable(void) +#ifndef arch_local_irq_disable +static inline void arch_local_irq_disable(void) { - raw_local_irq_restore(RAW_IRQ_DISABLED); + arch_local_irq_restore(ARCH_IRQ_DISABLED); } #endif /* test hardware interrupt enable bit */ -#ifndef raw_irqs_disabled -static inline int raw_irqs_disabled(void) +#ifndef arch_irqs_disabled +static inline int arch_irqs_disabled(void) { - return raw_irqs_disabled_flags(__raw_local_save_flags()); + return arch_irqs_disabled_flags(arch_local_save_flags()); } #endif -#define raw_local_save_flags(flags) \ - do { (flags) = __raw_local_save_flags(); } while (0) - -#define raw_local_irq_save(flags) \ - do { (flags) = __raw_local_irq_save(); } while (0) - #endif /* __ASM_GENERIC_IRQFLAGS_H */ diff --git a/include/asm-generic/kdebug.h b/include/asm-generic/kdebug.h index 11e57b6a85fc..d1814497bcdb 100644 --- a/include/asm-generic/kdebug.h +++ b/include/asm-generic/kdebug.h @@ -3,7 +3,7 @@ enum die_val { DIE_UNUSED, - DIE_OOPS=1 + DIE_OOPS = 1, }; #endif /* _ASM_GENERIC_KDEBUG_H */ diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 08923b684768..d17784ea37ff 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -55,14 +55,18 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; */ #define per_cpu(var, cpu) \ (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu))) -#define __get_cpu_var(var) \ - (*SHIFT_PERCPU_PTR(&(var), my_cpu_offset)) -#define __raw_get_cpu_var(var) \ - (*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset)) -#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) +#ifndef __this_cpu_ptr #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) +#endif +#ifdef CONFIG_DEBUG_PREEMPT +#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) +#else +#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr) +#endif +#define __get_cpu_var(var) (*this_cpu_ptr(&(var))) +#define __raw_get_cpu_var(var) (*__this_cpu_ptr(&(var))) #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA extern void setup_per_cpu_areas(void); diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index e2bd73e8f9c0..f4d4120e5128 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres #define move_pte(pte, prot, old_addr, new_addr) (pte) #endif +#ifndef flush_tlb_fix_spurious_fault +#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) +#endif + #ifndef pgprot_noncached #define pgprot_noncached(prot) (prot) #endif diff --git a/include/asm-generic/system.h b/include/asm-generic/system.h index efa403b5e121..4b0b9cbbfae5 100644 --- a/include/asm-generic/system.h +++ b/include/asm-generic/system.h @@ -21,6 +21,7 @@ #include <linux/irqflags.h> #include <asm/cmpxchg-local.h> +#include <asm/cmpxchg.h> struct task_struct; @@ -136,25 +137,6 @@ unsigned long __xchg(unsigned long x, volatile void *ptr, int size) #define xchg(ptr, x) \ ((__typeof__(*(ptr))) __xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) -static inline unsigned long __cmpxchg(volatile unsigned long *m, - unsigned long old, unsigned long new) -{ - unsigned long retval; - unsigned long flags; - - local_irq_save(flags); - retval = *m; - if (retval == old) - *m = new; - local_irq_restore(flags); - return retval; -} - -#define cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr))) __cmpxchg((unsigned long *)(ptr), \ - (unsigned long)(o), \ - (unsigned long)(n))) - #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8a92a170fb7d..f4229fb315e1 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -220,6 +220,8 @@ \ BUG_TABLE \ \ + JUMP_TABLE \ + \ /* PCI quirks */ \ .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \ @@ -563,6 +565,14 @@ #define BUG_TABLE #endif +#define JUMP_TABLE \ + . = ALIGN(8); \ + __jump_table : AT(ADDR(__jump_table) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start___jump_table) = .; \ + *(__jump_table) \ + VMLINUX_SYMBOL(__stop___jump_table) = .; \ + } + #ifdef CONFIG_PM_TRACE #define TRACEDATA \ . = ALIGN(4); \ @@ -677,7 +687,9 @@ - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ *(.data..percpu..first) \ + . = ALIGN(PAGE_SIZE); \ *(.data..percpu..page_aligned) \ + *(.data..percpu..readmostly) \ *(.data..percpu) \ *(.data..percpu..shared_aligned) \ VMLINUX_SYMBOL(__per_cpu_end) = .; \ @@ -703,7 +715,9 @@ VMLINUX_SYMBOL(__per_cpu_load) = .; \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ *(.data..percpu..first) \ + . = ALIGN(PAGE_SIZE); \ *(.data..percpu..page_aligned) \ + *(.data..percpu..readmostly) \ *(.data..percpu) \ *(.data..percpu..shared_aligned) \ VMLINUX_SYMBOL(__per_cpu_end) = .; \ diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 7809d230adee..4c9461a4f9e6 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -612,7 +612,7 @@ struct drm_gem_object { struct kref refcount; /** Handle count of this object. Each handle also holds a reference */ - struct kref handlecount; + atomic_t handle_count; /* number of handles on this object */ /** Related drm device */ struct drm_device *dev; @@ -808,7 +808,6 @@ struct drm_driver { */ int (*gem_init_object) (struct drm_gem_object *obj); void (*gem_free_object) (struct drm_gem_object *obj); - void (*gem_free_object_unlocked) (struct drm_gem_object *obj); /* vga arb irq handler */ void (*vgaarb_irq)(struct drm_device *dev, bool state); @@ -1175,6 +1174,7 @@ extern int drm_release(struct inode *inode, struct file *filp); extern int drm_mmap(struct file *filp, struct vm_area_struct *vma); extern int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma); extern void drm_vm_open_locked(struct vm_area_struct *vma); +extern void drm_vm_close_locked(struct vm_area_struct *vma); extern resource_size_t drm_core_get_map_ofs(struct drm_local_map * map); extern resource_size_t drm_core_get_reg_ofs(struct drm_device *dev); extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait); @@ -1455,12 +1455,11 @@ int drm_gem_init(struct drm_device *dev); void drm_gem_destroy(struct drm_device *dev); void drm_gem_object_release(struct drm_gem_object *obj); void drm_gem_object_free(struct kref *kref); -void drm_gem_object_free_unlocked(struct kref *kref); struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev, size_t size); int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size); -void drm_gem_object_handle_free(struct kref *kref); +void drm_gem_object_handle_free(struct drm_gem_object *obj); void drm_gem_vm_open(struct vm_area_struct *vma); void drm_gem_vm_close(struct vm_area_struct *vma); int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); @@ -1483,8 +1482,12 @@ drm_gem_object_unreference(struct drm_gem_object *obj) static inline void drm_gem_object_unreference_unlocked(struct drm_gem_object *obj) { - if (obj != NULL) - kref_put(&obj->refcount, drm_gem_object_free_unlocked); + if (obj != NULL) { + struct drm_device *dev = obj->dev; + mutex_lock(&dev->struct_mutex); + kref_put(&obj->refcount, drm_gem_object_free); + mutex_unlock(&dev->struct_mutex); + } } int drm_gem_handle_create(struct drm_file *file_priv, @@ -1495,7 +1498,7 @@ static inline void drm_gem_object_handle_reference(struct drm_gem_object *obj) { drm_gem_object_reference(obj); - kref_get(&obj->handlecount); + atomic_inc(&obj->handle_count); } static inline void @@ -1504,12 +1507,15 @@ drm_gem_object_handle_unreference(struct drm_gem_object *obj) if (obj == NULL) return; + if (atomic_read(&obj->handle_count) == 0) + return; /* * Must bump handle count first as this may be the last * ref, in which case the object would disappear before we * checked for a name */ - kref_put(&obj->handlecount, drm_gem_object_handle_free); + if (atomic_dec_and_test(&obj->handle_count)) + drm_gem_object_handle_free(obj); drm_gem_object_unreference(obj); } @@ -1519,12 +1525,17 @@ drm_gem_object_handle_unreference_unlocked(struct drm_gem_object *obj) if (obj == NULL) return; + if (atomic_read(&obj->handle_count) == 0) + return; + /* * Must bump handle count first as this may be the last * ref, in which case the object would disappear before we * checked for a name */ - kref_put(&obj->handlecount, drm_gem_object_handle_free); + + if (atomic_dec_and_test(&obj->handle_count)) + drm_gem_object_handle_free(obj); drm_gem_object_unreference_unlocked(obj); } diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 3a9940ef728b..883c1d439899 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -85,7 +85,6 @@ {0x1002, 0x5460, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ {0x1002, 0x5462, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ {0x1002, 0x5464, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ - {0x1002, 0x5657, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5548, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5549, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \ {0x1002, 0x554A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \ @@ -103,6 +102,7 @@ {0x1002, 0x564F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5652, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5653, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x5657, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5834, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP}, \ {0x1002, 0x5835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP|RADEON_IS_MOBILITY}, \ {0x1002, 0x5954, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS480|RADEON_IS_IGP|RADEON_IS_MOBILITY|RADEON_IS_IGPGART}, \ diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 267a86c74e2e..2040e6c4f172 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -246,9 +246,11 @@ struct ttm_buffer_object { atomic_t reserved; - /** * Members protected by the bo::lock + * In addition, setting sync_obj to anything else + * than NULL requires bo::reserved to be held. This allows for + * checking NULL while reserved but not holding bo::lock. */ void *sync_obj_arg; diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 626b629429ff..831c4634162c 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -118,7 +118,6 @@ header-y += eventpoll.h header-y += ext2_fs.h header-y += fadvise.h header-y += falloc.h -header-y += fanotify.h header-y += fb.h header-y += fcntl.h header-y += fd.h @@ -302,6 +301,7 @@ header-y += quota.h header-y += radeonfb.h header-y += random.h header-y += raw.h +header-y += rds.h header-y += reboot.h header-y += reiserfs_fs.h header-y += reiserfs_xattr.h diff --git a/include/linux/acpi_pmtmr.h b/include/linux/acpi_pmtmr.h index 7e3d2859be50..1d0ef1ae8036 100644 --- a/include/linux/acpi_pmtmr.h +++ b/include/linux/acpi_pmtmr.h @@ -25,8 +25,6 @@ static inline u32 acpi_pm_read_early(void) return acpi_pm_read_verified() & ACPI_PM_MASK; } -extern void pmtimer_wait(unsigned); - #else static inline u32 acpi_pm_read_early(void) diff --git a/include/linux/altera_uart.h b/include/linux/altera_uart.h index 8d441064a30d..a10a90791976 100644 --- a/include/linux/altera_uart.h +++ b/include/linux/altera_uart.h @@ -5,10 +5,15 @@ #ifndef __ALTUART_H #define __ALTUART_H +#include <linux/init.h> + struct altera_uart_platform_uart { unsigned long mapbase; /* Physical address base */ unsigned int irq; /* Interrupt vector */ unsigned int uartclk; /* UART clock rate */ + unsigned int bus_shift; /* Bus shift (address stride) */ }; +int __init early_altera_uart_setup(struct altera_uart_platform_uart *platp); + #endif /* __ALTUART_H */ diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index b0c174012436..c6454cca0447 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -20,6 +20,7 @@ #include <linux/resource.h> #define AMBA_NR_IRQS 2 +#define AMBA_CID 0xb105f00d struct clk; @@ -70,9 +71,15 @@ void amba_release_regions(struct amba_device *); #define amba_pclk_disable(d) \ do { if (!IS_ERR((d)->pclk)) clk_disable((d)->pclk); } while (0) -#define amba_config(d) (((d)->periphid >> 24) & 0xff) -#define amba_rev(d) (((d)->periphid >> 20) & 0x0f) -#define amba_manf(d) (((d)->periphid >> 12) & 0xff) -#define amba_part(d) ((d)->periphid & 0xfff) +/* Some drivers don't use the struct amba_device */ +#define AMBA_CONFIG_BITS(a) (((a) >> 24) & 0xff) +#define AMBA_REV_BITS(a) (((a) >> 20) & 0x0f) +#define AMBA_MANF_BITS(a) (((a) >> 12) & 0xff) +#define AMBA_PART_BITS(a) ((a) & 0xfff) + +#define amba_config(d) AMBA_CONFIG_BITS((d)->periphid) +#define amba_rev(d) AMBA_REV_BITS((d)->periphid) +#define amba_manf(d) AMBA_MANF_BITS((d)->periphid) +#define amba_part(d) AMBA_PART_BITS((d)->periphid) #endif diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h index ca84ce70d5d5..f4ee9acc9721 100644 --- a/include/linux/amba/mmci.h +++ b/include/linux/amba/mmci.h @@ -24,6 +24,7 @@ * whether a card is present in the MMC slot or not * @gpio_wp: read this GPIO pin to see if the card is write protected * @gpio_cd: read this GPIO pin to detect card insertion + * @cd_invert: true if the gpio_cd pin value is active low * @capabilities: the capabilities of the block as implemented in * this platform, signify anything MMC_CAP_* from mmc/host.h */ @@ -35,6 +36,7 @@ struct mmci_platform_data { unsigned int (*status)(struct device *); int gpio_wp; int gpio_cd; + bool cd_invert; unsigned long capabilities; }; diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h index abf26cc47a2b..4ce98f54186b 100644 --- a/include/linux/amba/pl022.h +++ b/include/linux/amba/pl022.h @@ -228,6 +228,7 @@ enum ssp_chip_select { }; +struct dma_chan; /** * struct pl022_ssp_master - device.platform_data for SPI controller devices. * @num_chipselect: chipselects are used to distinguish individual @@ -235,11 +236,16 @@ enum ssp_chip_select { * each slave has a chipselect signal, but it's common that not * every chipselect is connected to a slave. * @enable_dma: if true enables DMA driven transfers. + * @dma_rx_param: parameter to locate an RX DMA channel. + * @dma_tx_param: parameter to locate a TX DMA channel. */ struct pl022_ssp_controller { u16 bus_id; u8 num_chipselect; u8 enable_dma:1; + bool (*dma_filter)(struct dma_chan *chan, void *filter_param); + void *dma_rx_param; + void *dma_tx_param; }; /** @@ -270,20 +276,13 @@ struct pl022_ssp_controller { * @dma_config: DMA configuration for SSP controller and peripheral */ struct pl022_config_chip { - struct device *dev; - enum ssp_loopback lbm; enum ssp_interface iface; enum ssp_hierarchy hierarchy; bool slave_tx_disable; struct ssp_clock_params clk_freq; - enum ssp_rx_endian endian_rx; - enum ssp_tx_endian endian_tx; - enum ssp_data_size data_size; enum ssp_mode com_mode; enum ssp_rx_level_trig rx_lev_trig; enum ssp_tx_level_trig tx_lev_trig; - enum ssp_spi_clk_phase clk_phase; - enum ssp_spi_clk_pol clk_pol; enum ssp_microwire_ctrl_len ctrl_len; enum ssp_microwire_wait_state wait_state; enum ssp_duplex duplex; diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index e1b634b635f2..6021588ba0a8 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -32,7 +32,9 @@ #define UART01x_RSR 0x04 /* Receive status register (Read). */ #define UART01x_ECR 0x04 /* Error clear register (Write). */ #define UART010_LCRH 0x08 /* Line control register, high byte. */ +#define ST_UART011_DMAWM 0x08 /* DMA watermark configure register. */ #define UART010_LCRM 0x0C /* Line control register, middle byte. */ +#define ST_UART011_TIMEOUT 0x0C /* Timeout period register. */ #define UART010_LCRL 0x10 /* Line control register, low byte. */ #define UART010_CR 0x14 /* Control register. */ #define UART01x_FR 0x18 /* Flag register (Read only). */ @@ -51,6 +53,15 @@ #define UART011_MIS 0x40 /* Masked interrupt status. */ #define UART011_ICR 0x44 /* Interrupt clear register. */ #define UART011_DMACR 0x48 /* DMA control register. */ +#define ST_UART011_XFCR 0x50 /* XON/XOFF control register. */ +#define ST_UART011_XON1 0x54 /* XON1 register. */ +#define ST_UART011_XON2 0x58 /* XON2 register. */ +#define ST_UART011_XOFF1 0x5C /* XON1 register. */ +#define ST_UART011_XOFF2 0x60 /* XON2 register. */ +#define ST_UART011_ITCR 0x80 /* Integration test control register. */ +#define ST_UART011_ITIP 0x84 /* Integration test input register. */ +#define ST_UART011_ABCR 0x100 /* Autobaud control register. */ +#define ST_UART011_ABIMSC 0x15C /* Autobaud interrupt mask/clear register. */ #define UART011_DR_OE (1 << 11) #define UART011_DR_BE (1 << 10) diff --git a/include/linux/ata.h b/include/linux/ata.h index fe6e681a9d74..0c4929fa34d3 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -89,6 +89,7 @@ enum { ATA_ID_SPG = 98, ATA_ID_LBA_CAPACITY_2 = 100, ATA_ID_SECTOR_SIZE = 106, + ATA_ID_LOGICAL_SECTOR_SIZE = 117, /* and 118 */ ATA_ID_LAST_LUN = 126, ATA_ID_DLF = 128, ATA_ID_CSFO = 129, @@ -640,16 +641,49 @@ static inline int ata_id_flush_ext_enabled(const u16 *id) return (id[ATA_ID_CFS_ENABLE_2] & 0x2400) == 0x2400; } -static inline int ata_id_has_large_logical_sectors(const u16 *id) +static inline u32 ata_id_logical_sector_size(const u16 *id) { - if ((id[ATA_ID_SECTOR_SIZE] & 0xc000) != 0x4000) - return 0; - return id[ATA_ID_SECTOR_SIZE] & (1 << 13); + /* T13/1699-D Revision 6a, Sep 6, 2008. Page 128. + * IDENTIFY DEVICE data, word 117-118. + * 0xd000 ignores bit 13 (logical:physical > 1) + */ + if ((id[ATA_ID_SECTOR_SIZE] & 0xd000) == 0x5000) + return (((id[ATA_ID_LOGICAL_SECTOR_SIZE+1] << 16) + + id[ATA_ID_LOGICAL_SECTOR_SIZE]) * sizeof(u16)) ; + return ATA_SECT_SIZE; +} + +static inline u8 ata_id_log2_per_physical_sector(const u16 *id) +{ + /* T13/1699-D Revision 6a, Sep 6, 2008. Page 128. + * IDENTIFY DEVICE data, word 106. + * 0xe000 ignores bit 12 (logical sector > 512 bytes) + */ + if ((id[ATA_ID_SECTOR_SIZE] & 0xe000) == 0x6000) + return (id[ATA_ID_SECTOR_SIZE] & 0xf); + return 0; } -static inline u16 ata_id_logical_per_physical_sectors(const u16 *id) +/* Offset of logical sectors relative to physical sectors. + * + * If device has more than one logical sector per physical sector + * (aka 512 byte emulation), vendors might offset the "sector 0" address + * so sector 63 is "naturally aligned" - e.g. FAT partition table. + * This avoids Read/Mod/Write penalties when using FAT partition table + * and updating "well aligned" (FS perspective) physical sectors on every + * transaction. + */ +static inline u16 ata_id_logical_sector_offset(const u16 *id, + u8 log2_per_phys) { - return 1 << (id[ATA_ID_SECTOR_SIZE] & 0xf); + u16 word_209 = id[209]; + + if ((log2_per_phys > 1) && (word_209 & 0xc000) == 0x4000) { + u16 first = word_209 & 0x3fff; + if (first > 0) + return (1 << log2_per_phys) - first; + } + return 0; } static inline int ata_id_has_lba48(const u16 *id) diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index f6481daf6e52..a8e4e832cdbb 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -449,7 +449,7 @@ void vcc_insert_socket(struct sock *sk); static inline int atm_guess_pdu2truesize(int size) { - return (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info)); + return SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info); } diff --git a/include/linux/bio.h b/include/linux/bio.h index 5274103434ad..ba679992d39b 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -346,8 +346,15 @@ static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) } #else -#define bvec_kmap_irq(bvec, flags) (page_address((bvec)->bv_page) + (bvec)->bv_offset) -#define bvec_kunmap_irq(buf, flags) do { *(flags) = 0; } while (0) +static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) +{ + return page_address(bvec->bv_page) + bvec->bv_offset; +} + +static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) +{ + *flags = 0; +} #endif static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, @@ -496,6 +503,10 @@ static inline struct bio *bio_list_get(struct bio_list *bl) #define bip_for_each_vec(bvl, bip, i) \ __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx) +#define bio_for_each_integrity_vec(_bvl, _bio, _iter) \ + for_each_bio(_bio) \ + bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) + #define bio_integrity(bio) (bio->bi_integrity != NULL) extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); diff --git a/include/linux/bitops.h b/include/linux/bitops.h index fc68053378ce..827cc95711ef 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -136,28 +136,6 @@ static inline unsigned long __ffs64(u64 word) } #ifdef __KERNEL__ -#ifdef CONFIG_GENERIC_FIND_FIRST_BIT - -/** - * find_first_bit - find the first set bit in a memory region - * @addr: The address to start the search at - * @size: The maximum size to search - * - * Returns the bit number of the first set bit. - */ -extern unsigned long find_first_bit(const unsigned long *addr, - unsigned long size); - -/** - * find_first_zero_bit - find the first cleared bit in a memory region - * @addr: The address to start the search at - * @size: The maximum size to search - * - * Returns the bit number of the first cleared bit. - */ -extern unsigned long find_first_zero_bit(const unsigned long *addr, - unsigned long size); -#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ #ifdef CONFIG_GENERIC_FIND_LAST_BIT /** @@ -171,28 +149,5 @@ extern unsigned long find_last_bit(const unsigned long *addr, unsigned long size); #endif /* CONFIG_GENERIC_FIND_LAST_BIT */ -#ifdef CONFIG_GENERIC_FIND_NEXT_BIT - -/** - * find_next_bit - find the next set bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The bitmap size in bits - */ -extern unsigned long find_next_bit(const unsigned long *addr, - unsigned long size, unsigned long offset); - -/** - * find_next_zero_bit - find the next cleared bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The bitmap size in bits - */ - -extern unsigned long find_next_zero_bit(const unsigned long *addr, - unsigned long size, - unsigned long offset); - -#endif /* CONFIG_GENERIC_FIND_NEXT_BIT */ #endif /* __KERNEL__ */ #endif diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index ca83a97c9715..0437ab6bb54c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -97,6 +97,7 @@ struct bio { #define BIO_NULL_MAPPED 9 /* contains invalid user pages */ #define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */ #define BIO_QUIET 11 /* Make BIO Quiet */ +#define BIO_MAPPED_INTEGRITY 12/* integrity metadata has been remapped */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) /* @@ -130,6 +131,8 @@ enum rq_flag_bits { /* bio only flags */ __REQ_UNPLUG, /* unplug the immediately after submission */ __REQ_RAHEAD, /* read ahead, can fail anytime */ + __REQ_THROTTLED, /* This bio has already been subjected to + * throttling rules. Don't do it again. */ /* request only flags */ __REQ_SORTED, /* elevator knows about this request */ @@ -143,10 +146,8 @@ enum rq_flag_bits { __REQ_FAILED, /* set if the request failed */ __REQ_QUIET, /* don't worry about errors */ __REQ_PREEMPT, /* set for "ide_preempt" requests */ - __REQ_ORDERED_COLOR, /* is before or after barrier */ __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_COPY_USER, /* contains copies of user pages */ - __REQ_INTEGRITY, /* integrity metadata has been remapped */ __REQ_FLUSH, /* request for cache flush */ __REQ_IO_STAT, /* account I/O stat */ __REQ_MIXED_MERGE, /* merge of different types, fail separately */ @@ -168,10 +169,12 @@ enum rq_flag_bits { (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ - REQ_META| REQ_DISCARD | REQ_NOIDLE) + REQ_META | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) +#define REQ_CLONE_MASK REQ_COMMON_MASK #define REQ_UNPLUG (1 << __REQ_UNPLUG) #define REQ_RAHEAD (1 << __REQ_RAHEAD) +#define REQ_THROTTLED (1 << __REQ_THROTTLED) #define REQ_SORTED (1 << __REQ_SORTED) #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) @@ -184,10 +187,8 @@ enum rq_flag_bits { #define REQ_FAILED (1 << __REQ_FAILED) #define REQ_QUIET (1 << __REQ_QUIET) #define REQ_PREEMPT (1 << __REQ_PREEMPT) -#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) #define REQ_ALLOCED (1 << __REQ_ALLOCED) #define REQ_COPY_USER (1 << __REQ_COPY_USER) -#define REQ_INTEGRITY (1 << __REQ_INTEGRITY) #define REQ_FLUSH (1 << __REQ_FLUSH) #define REQ_IO_STAT (1 << __REQ_IO_STAT) #define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c54906f678f..009b80e49f53 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -115,6 +115,7 @@ struct request { void *elevator_private3; struct gendisk *rq_disk; + struct hd_struct *part; unsigned long start_time; #ifdef CONFIG_BLK_CGROUP unsigned long long start_time_ns; @@ -124,6 +125,9 @@ struct request { * physical address coalescing is performed. */ unsigned short nr_phys_segments; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + unsigned short nr_integrity_segments; +#endif unsigned short ioprio; @@ -243,6 +247,7 @@ struct queue_limits { unsigned short logical_block_size; unsigned short max_segments; + unsigned short max_integrity_segments; unsigned char misaligned; unsigned char discard_misaligned; @@ -355,18 +360,25 @@ struct request_queue struct blk_trace *blk_trace; #endif /* - * reserved for flush operations + * for flush operations */ - unsigned int ordered, next_ordered, ordseq; - int orderr, ordcolor; - struct request pre_flush_rq, bar_rq, post_flush_rq; - struct request *orig_bar_rq; + unsigned int flush_flags; + unsigned int flush_seq; + int flush_err; + struct request flush_rq; + struct request *orig_flush_rq; + struct list_head pending_flushes; struct mutex sysfs_lock; #if defined(CONFIG_BLK_DEV_BSG) struct bsg_class_device bsg_dev; #endif + +#ifdef CONFIG_BLK_DEV_THROTTLING + /* Throttle data */ + struct throtl_data *td; +#endif }; #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ @@ -462,56 +474,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) __clear_bit(flag, &q->queue_flags); } -enum { - /* - * Hardbarrier is supported with one of the following methods. - * - * NONE : hardbarrier unsupported - * DRAIN : ordering by draining is enough - * DRAIN_FLUSH : ordering by draining w/ pre and post flushes - * DRAIN_FUA : ordering by draining w/ pre flush and FUA write - * TAG : ordering by tag is enough - * TAG_FLUSH : ordering by tag w/ pre and post flushes - * TAG_FUA : ordering by tag w/ pre flush and FUA write - */ - QUEUE_ORDERED_BY_DRAIN = 0x01, - QUEUE_ORDERED_BY_TAG = 0x02, - QUEUE_ORDERED_DO_PREFLUSH = 0x10, - QUEUE_ORDERED_DO_BAR = 0x20, - QUEUE_ORDERED_DO_POSTFLUSH = 0x40, - QUEUE_ORDERED_DO_FUA = 0x80, - - QUEUE_ORDERED_NONE = 0x00, - - QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_BY_DRAIN | - QUEUE_ORDERED_DO_BAR, - QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_POSTFLUSH, - QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_FUA, - - QUEUE_ORDERED_TAG = QUEUE_ORDERED_BY_TAG | - QUEUE_ORDERED_DO_BAR, - QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_POSTFLUSH, - QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_FUA, - - /* - * Ordered operation sequence - */ - QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ - QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ - QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ - QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ - QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ - QUEUE_ORDSEQ_DONE = 0x20, -}; - #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) @@ -521,7 +483,6 @@ enum { #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) -#define blk_queue_flushing(q) ((q)->ordseq) #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) @@ -592,7 +553,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int sync) * it already be started by driver. */ #define RQ_NOMERGE_FLAGS \ - (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) + (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER | \ + REQ_FLUSH | REQ_FUA) #define rq_mergeable(rq) \ (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ (((rq)->cmd_flags & REQ_DISCARD) || \ @@ -851,7 +813,7 @@ extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); -extern void blk_queue_physical_block_size(struct request_queue *, unsigned short); +extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); extern void blk_queue_alignment_offset(struct request_queue *q, unsigned int alignment); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); @@ -881,12 +843,8 @@ extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); +extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern int blk_queue_ordered(struct request_queue *, unsigned); -extern bool blk_do_ordered(struct request_queue *, struct request **); -extern unsigned blk_ordered_cur_seq(struct request_queue *); -extern unsigned blk_ordered_req_seq(struct request *); -extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int); extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); @@ -919,27 +877,20 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, return NULL; return bqt->tag_index[tag]; } -enum{ - BLKDEV_WAIT, /* wait for completion */ - BLKDEV_BARRIER, /* issue request with barrier */ - BLKDEV_SECURE, /* secure discard */ -}; -#define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT) -#define BLKDEV_IFL_BARRIER (1 << BLKDEV_BARRIER) -#define BLKDEV_IFL_SECURE (1 << BLKDEV_SECURE) -extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *, - unsigned long); + +#define BLKDEV_DISCARD_SECURE 0x01 /* secure discard */ + +extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); -static inline int sb_issue_discard(struct super_block *sb, - sector_t block, sector_t nr_blocks) + sector_t nr_sects, gfp_t gfp_mask); +static inline int sb_issue_discard(struct super_block *sb, sector_t block, + sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) { - block <<= (sb->s_blocksize_bits - 9); - nr_blocks <<= (sb->s_blocksize_bits - 9); - return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS, - BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); + return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9), + nr_blocks << (sb->s_blocksize_bits - 9), + gfp_mask, flags); } extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); @@ -1004,7 +955,7 @@ static inline unsigned int queue_physical_block_size(struct request_queue *q) return q->limits.physical_block_size; } -static inline int bdev_physical_block_size(struct block_device *bdev) +static inline unsigned int bdev_physical_block_size(struct block_device *bdev) { return queue_physical_block_size(bdev_get_queue(bdev)); } @@ -1093,11 +1044,11 @@ static inline int queue_dma_alignment(struct request_queue *q) return q ? q->dma_alignment : 511; } -static inline int blk_rq_aligned(struct request_queue *q, void *addr, +static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, unsigned int len) { unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; - return !((unsigned long)addr & alignment) && !(len & alignment); + return !(addr & alignment) && !(len & alignment); } /* assumes size > 256 */ @@ -1127,6 +1078,7 @@ static inline void put_dev_sector(Sector p) struct work_struct; int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); +int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay); #ifdef CONFIG_BLK_CGROUP /* @@ -1170,6 +1122,24 @@ static inline uint64_t rq_io_start_time_ns(struct request *req) } #endif +#ifdef CONFIG_BLK_DEV_THROTTLING +extern int blk_throtl_init(struct request_queue *q); +extern void blk_throtl_exit(struct request_queue *q); +extern int blk_throtl_bio(struct request_queue *q, struct bio **bio); +extern void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay); +extern void throtl_shutdown_timer_wq(struct request_queue *q); +#else /* CONFIG_BLK_DEV_THROTTLING */ +static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio) +{ + return 0; +} + +static inline int blk_throtl_init(struct request_queue *q) { return 0; } +static inline int blk_throtl_exit(struct request_queue *q) { return 0; } +static inline void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) {} +static inline void throtl_shutdown_timer_wq(struct request_queue *q) {} +#endif /* CONFIG_BLK_DEV_THROTTLING */ + #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ @@ -1213,8 +1183,13 @@ struct blk_integrity { extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); extern void blk_integrity_unregister(struct gendisk *); extern int blk_integrity_compare(struct gendisk *, struct gendisk *); -extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); -extern int blk_rq_count_integrity_sg(struct request *); +extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, + struct scatterlist *); +extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); +extern int blk_integrity_merge_rq(struct request_queue *, struct request *, + struct request *); +extern int blk_integrity_merge_bio(struct request_queue *, struct request *, + struct bio *); static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev) @@ -1235,16 +1210,32 @@ static inline int blk_integrity_rq(struct request *rq) return bio_integrity(rq->bio); } +static inline void blk_queue_max_integrity_segments(struct request_queue *q, + unsigned int segs) +{ + q->limits.max_integrity_segments = segs; +} + +static inline unsigned short +queue_max_integrity_segments(struct request_queue *q) +{ + return q->limits.max_integrity_segments; +} + #else /* CONFIG_BLK_DEV_INTEGRITY */ #define blk_integrity_rq(rq) (0) -#define blk_rq_count_integrity_sg(a) (0) -#define blk_rq_map_integrity_sg(a, b) (0) +#define blk_rq_count_integrity_sg(a, b) (0) +#define blk_rq_map_integrity_sg(a, b, c) (0) #define bdev_get_integrity(a) (0) #define blk_get_integrity(a) (0) #define blk_integrity_compare(a, b) (0) #define blk_integrity_register(a, b) (0) #define blk_integrity_unregister(a) do { } while (0); +#define blk_queue_max_integrity_segments(a, b) do { } while (0); +#define queue_max_integrity_segments(a) (0) +#define blk_integrity_merge_rq(a, b, c) (0) +#define blk_integrity_merge_bio(a, b, c) (0) #endif /* CONFIG_BLK_DEV_INTEGRITY */ diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index ec94c12f21da..dd1b25b2641c 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -32,7 +32,6 @@ enum bh_state_bits { BH_Delay, /* Buffer is not yet allocated on disk */ BH_Boundary, /* Block is followed by a discontiguity */ BH_Write_EIO, /* I/O error on write */ - BH_Eopnotsupp, /* operation not supported (barrier) */ BH_Unwritten, /* Buffer is allocated on disk but not written */ BH_Quiet, /* Buffer Error Prinks to be quiet */ @@ -124,7 +123,6 @@ BUFFER_FNS(Async_Write, async_write) BUFFER_FNS(Delay, delay) BUFFER_FNS(Boundary, boundary) BUFFER_FNS(Write_EIO, write_io_error) -BUFFER_FNS(Eopnotsupp, eopnotsupp) BUFFER_FNS(Unwritten, unwritten) #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) diff --git a/include/linux/can/platform/mcp251x.h b/include/linux/can/platform/mcp251x.h index dba28268e651..8e20540043f5 100644 --- a/include/linux/can/platform/mcp251x.h +++ b/include/linux/can/platform/mcp251x.h @@ -12,7 +12,6 @@ /** * struct mcp251x_platform_data - MCP251X SPI CAN controller platform data * @oscillator_frequency: - oscillator frequency in Hz - * @model: - actual type of chip * @board_specific_setup: - called before probing the chip (power,reset) * @transceiver_enable: - called to power on/off the transceiver * @power_enable: - called to power on/off the mcp *and* the @@ -25,9 +24,6 @@ struct mcp251x_platform_data { unsigned long oscillator_frequency; - int model; -#define CAN_MCP251X_MCP2510 0x2510 -#define CAN_MCP251X_MCP2515 0x2515 int (*board_specific_setup)(struct spi_device *spi); int (*transceiver_enable)(int enable); int (*power_enable) (int enable); diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h new file mode 100644 index 000000000000..7fff521d7eb5 --- /dev/null +++ b/include/linux/ceph/auth.h @@ -0,0 +1,92 @@ +#ifndef _FS_CEPH_AUTH_H +#define _FS_CEPH_AUTH_H + +#include <linux/ceph/types.h> +#include <linux/ceph/buffer.h> + +/* + * Abstract interface for communicating with the authenticate module. + * There is some handshake that takes place between us and the monitor + * to acquire the necessary keys. These are used to generate an + * 'authorizer' that we use when connecting to a service (mds, osd). + */ + +struct ceph_auth_client; +struct ceph_authorizer; + +struct ceph_auth_client_ops { + const char *name; + + /* + * true if we are authenticated and can connect to + * services. + */ + int (*is_authenticated)(struct ceph_auth_client *ac); + + /* + * true if we should (re)authenticate, e.g., when our tickets + * are getting old and crusty. + */ + int (*should_authenticate)(struct ceph_auth_client *ac); + + /* + * build requests and process replies during monitor + * handshake. if handle_reply returns -EAGAIN, we build + * another request. + */ + int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end); + int (*handle_reply)(struct ceph_auth_client *ac, int result, + void *buf, void *end); + + /* + * Create authorizer for connecting to a service, and verify + * the response to authenticate the service. + */ + int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type, + struct ceph_authorizer **a, + void **buf, size_t *len, + void **reply_buf, size_t *reply_len); + int (*verify_authorizer_reply)(struct ceph_auth_client *ac, + struct ceph_authorizer *a, size_t len); + void (*destroy_authorizer)(struct ceph_auth_client *ac, + struct ceph_authorizer *a); + void (*invalidate_authorizer)(struct ceph_auth_client *ac, + int peer_type); + + /* reset when we (re)connect to a monitor */ + void (*reset)(struct ceph_auth_client *ac); + + void (*destroy)(struct ceph_auth_client *ac); +}; + +struct ceph_auth_client { + u32 protocol; /* CEPH_AUTH_* */ + void *private; /* for use by protocol implementation */ + const struct ceph_auth_client_ops *ops; /* null iff protocol==0 */ + + bool negotiating; /* true if negotiating protocol */ + const char *name; /* entity name */ + u64 global_id; /* our unique id in system */ + const char *secret; /* our secret key */ + unsigned want_keys; /* which services we want */ +}; + +extern struct ceph_auth_client *ceph_auth_init(const char *name, + const char *secret); +extern void ceph_auth_destroy(struct ceph_auth_client *ac); + +extern void ceph_auth_reset(struct ceph_auth_client *ac); + +extern int ceph_auth_build_hello(struct ceph_auth_client *ac, + void *buf, size_t len); +extern int ceph_handle_auth_reply(struct ceph_auth_client *ac, + void *buf, size_t len, + void *reply_buf, size_t reply_len); +extern int ceph_entity_name_encode(const char *name, void **p, void *end); + +extern int ceph_build_auth(struct ceph_auth_client *ac, + void *msg_buf, size_t msg_len); + +extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac); + +#endif diff --git a/include/linux/ceph/buffer.h b/include/linux/ceph/buffer.h new file mode 100644 index 000000000000..58d19014068f --- /dev/null +++ b/include/linux/ceph/buffer.h @@ -0,0 +1,39 @@ +#ifndef __FS_CEPH_BUFFER_H +#define __FS_CEPH_BUFFER_H + +#include <linux/kref.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <linux/types.h> +#include <linux/uio.h> + +/* + * a simple reference counted buffer. + * + * use kmalloc for small sizes (<= one page), vmalloc for larger + * sizes. + */ +struct ceph_buffer { + struct kref kref; + struct kvec vec; + size_t alloc_len; + bool is_vmalloc; +}; + +extern struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp); +extern void ceph_buffer_release(struct kref *kref); + +static inline struct ceph_buffer *ceph_buffer_get(struct ceph_buffer *b) +{ + kref_get(&b->kref); + return b; +} + +static inline void ceph_buffer_put(struct ceph_buffer *b) +{ + kref_put(&b->kref, ceph_buffer_release); +} + +extern int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end); + +#endif diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h new file mode 100644 index 000000000000..aa2e19182d99 --- /dev/null +++ b/include/linux/ceph/ceph_debug.h @@ -0,0 +1,38 @@ +#ifndef _FS_CEPH_DEBUG_H +#define _FS_CEPH_DEBUG_H + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#ifdef CONFIG_CEPH_LIB_PRETTYDEBUG + +/* + * wrap pr_debug to include a filename:lineno prefix on each line. + * this incurs some overhead (kernel size and execution time) due to + * the extra function call at each call site. + */ + +# if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) +extern const char *ceph_file_part(const char *s, int len); +# define dout(fmt, ...) \ + pr_debug("%.*s %12.12s:%-4d : " fmt, \ + 8 - (int)sizeof(KBUILD_MODNAME), " ", \ + ceph_file_part(__FILE__, sizeof(__FILE__)), \ + __LINE__, ##__VA_ARGS__) +# else +/* faux printk call just to see any compiler warnings. */ +# define dout(fmt, ...) do { \ + if (0) \ + printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ + } while (0) +# endif + +#else + +/* + * or, just wrap pr_debug + */ +# define dout(fmt, ...) pr_debug(" " fmt, ##__VA_ARGS__) + +#endif + +#endif diff --git a/include/linux/ceph/ceph_frag.h b/include/linux/ceph/ceph_frag.h new file mode 100644 index 000000000000..5babb8e95352 --- /dev/null +++ b/include/linux/ceph/ceph_frag.h @@ -0,0 +1,109 @@ +#ifndef FS_CEPH_FRAG_H +#define FS_CEPH_FRAG_H + +/* + * "Frags" are a way to describe a subset of a 32-bit number space, + * using a mask and a value to match against that mask. Any given frag + * (subset of the number space) can be partitioned into 2^n sub-frags. + * + * Frags are encoded into a 32-bit word: + * 8 upper bits = "bits" + * 24 lower bits = "value" + * (We could go to 5+27 bits, but who cares.) + * + * We use the _most_ significant bits of the 24 bit value. This makes + * values logically sort. + * + * Unfortunately, because the "bits" field is still in the high bits, we + * can't sort encoded frags numerically. However, it does allow you + * to feed encoded frags as values into frag_contains_value. + */ +static inline __u32 ceph_frag_make(__u32 b, __u32 v) +{ + return (b << 24) | + (v & (0xffffffu << (24-b)) & 0xffffffu); +} +static inline __u32 ceph_frag_bits(__u32 f) +{ + return f >> 24; +} +static inline __u32 ceph_frag_value(__u32 f) +{ + return f & 0xffffffu; +} +static inline __u32 ceph_frag_mask(__u32 f) +{ + return (0xffffffu << (24-ceph_frag_bits(f))) & 0xffffffu; +} +static inline __u32 ceph_frag_mask_shift(__u32 f) +{ + return 24 - ceph_frag_bits(f); +} + +static inline int ceph_frag_contains_value(__u32 f, __u32 v) +{ + return (v & ceph_frag_mask(f)) == ceph_frag_value(f); +} +static inline int ceph_frag_contains_frag(__u32 f, __u32 sub) +{ + /* is sub as specific as us, and contained by us? */ + return ceph_frag_bits(sub) >= ceph_frag_bits(f) && + (ceph_frag_value(sub) & ceph_frag_mask(f)) == ceph_frag_value(f); +} + +static inline __u32 ceph_frag_parent(__u32 f) +{ + return ceph_frag_make(ceph_frag_bits(f) - 1, + ceph_frag_value(f) & (ceph_frag_mask(f) << 1)); +} +static inline int ceph_frag_is_left_child(__u32 f) +{ + return ceph_frag_bits(f) > 0 && + (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 0; +} +static inline int ceph_frag_is_right_child(__u32 f) +{ + return ceph_frag_bits(f) > 0 && + (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 1; +} +static inline __u32 ceph_frag_sibling(__u32 f) +{ + return ceph_frag_make(ceph_frag_bits(f), + ceph_frag_value(f) ^ (0x1000000 >> ceph_frag_bits(f))); +} +static inline __u32 ceph_frag_left_child(__u32 f) +{ + return ceph_frag_make(ceph_frag_bits(f)+1, ceph_frag_value(f)); +} +static inline __u32 ceph_frag_right_child(__u32 f) +{ + return ceph_frag_make(ceph_frag_bits(f)+1, + ceph_frag_value(f) | (0x1000000 >> (1+ceph_frag_bits(f)))); +} +static inline __u32 ceph_frag_make_child(__u32 f, int by, int i) +{ + int newbits = ceph_frag_bits(f) + by; + return ceph_frag_make(newbits, + ceph_frag_value(f) | (i << (24 - newbits))); +} +static inline int ceph_frag_is_leftmost(__u32 f) +{ + return ceph_frag_value(f) == 0; +} +static inline int ceph_frag_is_rightmost(__u32 f) +{ + return ceph_frag_value(f) == ceph_frag_mask(f); +} +static inline __u32 ceph_frag_next(__u32 f) +{ + return ceph_frag_make(ceph_frag_bits(f), + ceph_frag_value(f) + (0x1000000 >> ceph_frag_bits(f))); +} + +/* + * comparator to sort frags logically, as when traversing the + * number space in ascending order... + */ +int ceph_frag_compare(__u32 a, __u32 b); + +#endif diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h new file mode 100644 index 000000000000..c3c74aef289d --- /dev/null +++ b/include/linux/ceph/ceph_fs.h @@ -0,0 +1,729 @@ +/* + * ceph_fs.h - Ceph constants and data types to share between kernel and + * user space. + * + * Most types in this file are defined as little-endian, and are + * primarily intended to describe data structures that pass over the + * wire or that are stored on disk. + * + * LGPL2 + */ + +#ifndef CEPH_FS_H +#define CEPH_FS_H + +#include "msgr.h" +#include "rados.h" + +/* + * subprotocol versions. when specific messages types or high-level + * protocols change, bump the affected components. we keep rev + * internal cluster protocols separately from the public, + * client-facing protocol. + */ +#define CEPH_OSD_PROTOCOL 8 /* cluster internal */ +#define CEPH_MDS_PROTOCOL 12 /* cluster internal */ +#define CEPH_MON_PROTOCOL 5 /* cluster internal */ +#define CEPH_OSDC_PROTOCOL 24 /* server/client */ +#define CEPH_MDSC_PROTOCOL 32 /* server/client */ +#define CEPH_MONC_PROTOCOL 15 /* server/client */ + + +#define CEPH_INO_ROOT 1 +#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ + +/* arbitrary limit on max # of monitors (cluster of 3 is typical) */ +#define CEPH_MAX_MON 31 + + +/* + * feature bits + */ +#define CEPH_FEATURE_UID (1<<0) +#define CEPH_FEATURE_NOSRCADDR (1<<1) +#define CEPH_FEATURE_MONCLOCKCHECK (1<<2) +#define CEPH_FEATURE_FLOCK (1<<3) + + +/* + * ceph_file_layout - describe data layout for a file/inode + */ +struct ceph_file_layout { + /* file -> object mapping */ + __le32 fl_stripe_unit; /* stripe unit, in bytes. must be multiple + of page size. */ + __le32 fl_stripe_count; /* over this many objects */ + __le32 fl_object_size; /* until objects are this big, then move to + new objects */ + __le32 fl_cas_hash; /* 0 = none; 1 = sha256 */ + + /* pg -> disk layout */ + __le32 fl_object_stripe_unit; /* for per-object parity, if any */ + + /* object -> pg layout */ + __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */ + __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ +} __attribute__ ((packed)); + +#define CEPH_MIN_STRIPE_UNIT 65536 + +int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); + + +/* crypto algorithms */ +#define CEPH_CRYPTO_NONE 0x0 +#define CEPH_CRYPTO_AES 0x1 + +#define CEPH_AES_IV "cephsageyudagreg" + +/* security/authentication protocols */ +#define CEPH_AUTH_UNKNOWN 0x0 +#define CEPH_AUTH_NONE 0x1 +#define CEPH_AUTH_CEPHX 0x2 + +#define CEPH_AUTH_UID_DEFAULT ((__u64) -1) + + +/********************************************* + * message layer + */ + +/* + * message types + */ + +/* misc */ +#define CEPH_MSG_SHUTDOWN 1 +#define CEPH_MSG_PING 2 + +/* client <-> monitor */ +#define CEPH_MSG_MON_MAP 4 +#define CEPH_MSG_MON_GET_MAP 5 +#define CEPH_MSG_STATFS 13 +#define CEPH_MSG_STATFS_REPLY 14 +#define CEPH_MSG_MON_SUBSCRIBE 15 +#define CEPH_MSG_MON_SUBSCRIBE_ACK 16 +#define CEPH_MSG_AUTH 17 +#define CEPH_MSG_AUTH_REPLY 18 + +/* client <-> mds */ +#define CEPH_MSG_MDS_MAP 21 + +#define CEPH_MSG_CLIENT_SESSION 22 +#define CEPH_MSG_CLIENT_RECONNECT 23 + +#define CEPH_MSG_CLIENT_REQUEST 24 +#define CEPH_MSG_CLIENT_REQUEST_FORWARD 25 +#define CEPH_MSG_CLIENT_REPLY 26 +#define CEPH_MSG_CLIENT_CAPS 0x310 +#define CEPH_MSG_CLIENT_LEASE 0x311 +#define CEPH_MSG_CLIENT_SNAP 0x312 +#define CEPH_MSG_CLIENT_CAPRELEASE 0x313 + +/* pool ops */ +#define CEPH_MSG_POOLOP_REPLY 48 +#define CEPH_MSG_POOLOP 49 + + +/* osd */ +#define CEPH_MSG_OSD_MAP 41 +#define CEPH_MSG_OSD_OP 42 +#define CEPH_MSG_OSD_OPREPLY 43 + +/* pool operations */ +enum { + POOL_OP_CREATE = 0x01, + POOL_OP_DELETE = 0x02, + POOL_OP_AUID_CHANGE = 0x03, + POOL_OP_CREATE_SNAP = 0x11, + POOL_OP_DELETE_SNAP = 0x12, + POOL_OP_CREATE_UNMANAGED_SNAP = 0x21, + POOL_OP_DELETE_UNMANAGED_SNAP = 0x22, +}; + +struct ceph_mon_request_header { + __le64 have_version; + __le16 session_mon; + __le64 session_mon_tid; +} __attribute__ ((packed)); + +struct ceph_mon_statfs { + struct ceph_mon_request_header monhdr; + struct ceph_fsid fsid; +} __attribute__ ((packed)); + +struct ceph_statfs { + __le64 kb, kb_used, kb_avail; + __le64 num_objects; +} __attribute__ ((packed)); + +struct ceph_mon_statfs_reply { + struct ceph_fsid fsid; + __le64 version; + struct ceph_statfs st; +} __attribute__ ((packed)); + +const char *ceph_pool_op_name(int op); + +struct ceph_mon_poolop { + struct ceph_mon_request_header monhdr; + struct ceph_fsid fsid; + __le32 pool; + __le32 op; + __le64 auid; + __le64 snapid; + __le32 name_len; +} __attribute__ ((packed)); + +struct ceph_mon_poolop_reply { + struct ceph_mon_request_header monhdr; + struct ceph_fsid fsid; + __le32 reply_code; + __le32 epoch; + char has_data; + char data[0]; +} __attribute__ ((packed)); + +struct ceph_mon_unmanaged_snap { + __le64 snapid; +} __attribute__ ((packed)); + +struct ceph_osd_getmap { + struct ceph_mon_request_header monhdr; + struct ceph_fsid fsid; + __le32 start; +} __attribute__ ((packed)); + +struct ceph_mds_getmap { + struct ceph_mon_request_header monhdr; + struct ceph_fsid fsid; +} __attribute__ ((packed)); + +struct ceph_client_mount { + struct ceph_mon_request_header monhdr; +} __attribute__ ((packed)); + +struct ceph_mon_subscribe_item { + __le64 have_version; __le64 have; + __u8 onetime; +} __attribute__ ((packed)); + +struct ceph_mon_subscribe_ack { + __le32 duration; /* seconds */ + struct ceph_fsid fsid; +} __attribute__ ((packed)); + +/* + * mds states + * > 0 -> in + * <= 0 -> out + */ +#define CEPH_MDS_STATE_DNE 0 /* down, does not exist. */ +#define CEPH_MDS_STATE_STOPPED -1 /* down, once existed, but no subtrees. + empty log. */ +#define CEPH_MDS_STATE_BOOT -4 /* up, boot announcement. */ +#define CEPH_MDS_STATE_STANDBY -5 /* up, idle. waiting for assignment. */ +#define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */ +#define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */ +#define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */ + +#define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */ +#define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed + operations (import, rename, etc.) */ +#define CEPH_MDS_STATE_RECONNECT 10 /* up, reconnect to clients */ +#define CEPH_MDS_STATE_REJOIN 11 /* up, rejoining distributed cache */ +#define CEPH_MDS_STATE_CLIENTREPLAY 12 /* up, replaying client operations */ +#define CEPH_MDS_STATE_ACTIVE 13 /* up, active */ +#define CEPH_MDS_STATE_STOPPING 14 /* up, but exporting metadata */ + +extern const char *ceph_mds_state_name(int s); + + +/* + * metadata lock types. + * - these are bitmasks.. we can compose them + * - they also define the lock ordering by the MDS + * - a few of these are internal to the mds + */ +#define CEPH_LOCK_DVERSION 1 +#define CEPH_LOCK_DN 2 +#define CEPH_LOCK_ISNAP 16 +#define CEPH_LOCK_IVERSION 32 /* mds internal */ +#define CEPH_LOCK_IFILE 64 +#define CEPH_LOCK_IAUTH 128 +#define CEPH_LOCK_ILINK 256 +#define CEPH_LOCK_IDFT 512 /* dir frag tree */ +#define CEPH_LOCK_INEST 1024 /* mds internal */ +#define CEPH_LOCK_IXATTR 2048 +#define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */ +#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ + +/* client_session ops */ +enum { + CEPH_SESSION_REQUEST_OPEN, + CEPH_SESSION_OPEN, + CEPH_SESSION_REQUEST_CLOSE, + CEPH_SESSION_CLOSE, + CEPH_SESSION_REQUEST_RENEWCAPS, + CEPH_SESSION_RENEWCAPS, + CEPH_SESSION_STALE, + CEPH_SESSION_RECALL_STATE, +}; + +extern const char *ceph_session_op_name(int op); + +struct ceph_mds_session_head { + __le32 op; + __le64 seq; + struct ceph_timespec stamp; + __le32 max_caps, max_leases; +} __attribute__ ((packed)); + +/* client_request */ +/* + * metadata ops. + * & 0x001000 -> write op + * & 0x010000 -> follow symlink (e.g. stat(), not lstat()). + & & 0x100000 -> use weird ino/path trace + */ +#define CEPH_MDS_OP_WRITE 0x001000 +enum { + CEPH_MDS_OP_LOOKUP = 0x00100, + CEPH_MDS_OP_GETATTR = 0x00101, + CEPH_MDS_OP_LOOKUPHASH = 0x00102, + CEPH_MDS_OP_LOOKUPPARENT = 0x00103, + + CEPH_MDS_OP_SETXATTR = 0x01105, + CEPH_MDS_OP_RMXATTR = 0x01106, + CEPH_MDS_OP_SETLAYOUT = 0x01107, + CEPH_MDS_OP_SETATTR = 0x01108, + CEPH_MDS_OP_SETFILELOCK= 0x01109, + CEPH_MDS_OP_GETFILELOCK= 0x00110, + CEPH_MDS_OP_SETDIRLAYOUT=0x0110a, + + CEPH_MDS_OP_MKNOD = 0x01201, + CEPH_MDS_OP_LINK = 0x01202, + CEPH_MDS_OP_UNLINK = 0x01203, + CEPH_MDS_OP_RENAME = 0x01204, + CEPH_MDS_OP_MKDIR = 0x01220, + CEPH_MDS_OP_RMDIR = 0x01221, + CEPH_MDS_OP_SYMLINK = 0x01222, + + CEPH_MDS_OP_CREATE = 0x01301, + CEPH_MDS_OP_OPEN = 0x00302, + CEPH_MDS_OP_READDIR = 0x00305, + + CEPH_MDS_OP_LOOKUPSNAP = 0x00400, + CEPH_MDS_OP_MKSNAP = 0x01400, + CEPH_MDS_OP_RMSNAP = 0x01401, + CEPH_MDS_OP_LSSNAP = 0x00402, +}; + +extern const char *ceph_mds_op_name(int op); + + +#define CEPH_SETATTR_MODE 1 +#define CEPH_SETATTR_UID 2 +#define CEPH_SETATTR_GID 4 +#define CEPH_SETATTR_MTIME 8 +#define CEPH_SETATTR_ATIME 16 +#define CEPH_SETATTR_SIZE 32 +#define CEPH_SETATTR_CTIME 64 + +union ceph_mds_request_args { + struct { + __le32 mask; /* CEPH_CAP_* */ + } __attribute__ ((packed)) getattr; + struct { + __le32 mode; + __le32 uid; + __le32 gid; + struct ceph_timespec mtime; + struct ceph_timespec atime; + __le64 size, old_size; /* old_size needed by truncate */ + __le32 mask; /* CEPH_SETATTR_* */ + } __attribute__ ((packed)) setattr; + struct { + __le32 frag; /* which dir fragment */ + __le32 max_entries; /* how many dentries to grab */ + __le32 max_bytes; + } __attribute__ ((packed)) readdir; + struct { + __le32 mode; + __le32 rdev; + } __attribute__ ((packed)) mknod; + struct { + __le32 mode; + } __attribute__ ((packed)) mkdir; + struct { + __le32 flags; + __le32 mode; + __le32 stripe_unit; /* layout for newly created file */ + __le32 stripe_count; /* ... */ + __le32 object_size; + __le32 file_replication; + __le32 preferred; + } __attribute__ ((packed)) open; + struct { + __le32 flags; + } __attribute__ ((packed)) setxattr; + struct { + struct ceph_file_layout layout; + } __attribute__ ((packed)) setlayout; + struct { + __u8 rule; /* currently fcntl or flock */ + __u8 type; /* shared, exclusive, remove*/ + __le64 pid; /* process id requesting the lock */ + __le64 pid_namespace; + __le64 start; /* initial location to lock */ + __le64 length; /* num bytes to lock from start */ + __u8 wait; /* will caller wait for lock to become available? */ + } __attribute__ ((packed)) filelock_change; +} __attribute__ ((packed)); + +#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ +#define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */ + +struct ceph_mds_request_head { + __le64 oldest_client_tid; + __le32 mdsmap_epoch; /* on client */ + __le32 flags; /* CEPH_MDS_FLAG_* */ + __u8 num_retry, num_fwd; /* count retry, fwd attempts */ + __le16 num_releases; /* # include cap/lease release records */ + __le32 op; /* mds op code */ + __le32 caller_uid, caller_gid; + __le64 ino; /* use this ino for openc, mkdir, mknod, + etc. (if replaying) */ + union ceph_mds_request_args args; +} __attribute__ ((packed)); + +/* cap/lease release record */ +struct ceph_mds_request_release { + __le64 ino, cap_id; /* ino and unique cap id */ + __le32 caps, wanted; /* new issued, wanted */ + __le32 seq, issue_seq, mseq; + __le32 dname_seq; /* if releasing a dentry lease, a */ + __le32 dname_len; /* string follows. */ +} __attribute__ ((packed)); + +/* client reply */ +struct ceph_mds_reply_head { + __le32 op; + __le32 result; + __le32 mdsmap_epoch; + __u8 safe; /* true if committed to disk */ + __u8 is_dentry, is_target; /* true if dentry, target inode records + are included with reply */ +} __attribute__ ((packed)); + +/* one for each node split */ +struct ceph_frag_tree_split { + __le32 frag; /* this frag splits... */ + __le32 by; /* ...by this many bits */ +} __attribute__ ((packed)); + +struct ceph_frag_tree_head { + __le32 nsplits; /* num ceph_frag_tree_split records */ + struct ceph_frag_tree_split splits[]; +} __attribute__ ((packed)); + +/* capability issue, for bundling with mds reply */ +struct ceph_mds_reply_cap { + __le32 caps, wanted; /* caps issued, wanted */ + __le64 cap_id; + __le32 seq, mseq; + __le64 realm; /* snap realm */ + __u8 flags; /* CEPH_CAP_FLAG_* */ +} __attribute__ ((packed)); + +#define CEPH_CAP_FLAG_AUTH 1 /* cap is issued by auth mds */ + +/* inode record, for bundling with mds reply */ +struct ceph_mds_reply_inode { + __le64 ino; + __le64 snapid; + __le32 rdev; + __le64 version; /* inode version */ + __le64 xattr_version; /* version for xattr blob */ + struct ceph_mds_reply_cap cap; /* caps issued for this inode */ + struct ceph_file_layout layout; + struct ceph_timespec ctime, mtime, atime; + __le32 time_warp_seq; + __le64 size, max_size, truncate_size; + __le32 truncate_seq; + __le32 mode, uid, gid; + __le32 nlink; + __le64 files, subdirs, rbytes, rfiles, rsubdirs; /* dir stats */ + struct ceph_timespec rctime; + struct ceph_frag_tree_head fragtree; /* (must be at end of struct) */ +} __attribute__ ((packed)); +/* followed by frag array, then symlink string, then xattr blob */ + +/* reply_lease follows dname, and reply_inode */ +struct ceph_mds_reply_lease { + __le16 mask; /* lease type(s) */ + __le32 duration_ms; /* lease duration */ + __le32 seq; +} __attribute__ ((packed)); + +struct ceph_mds_reply_dirfrag { + __le32 frag; /* fragment */ + __le32 auth; /* auth mds, if this is a delegation point */ + __le32 ndist; /* number of mds' this is replicated on */ + __le32 dist[]; +} __attribute__ ((packed)); + +#define CEPH_LOCK_FCNTL 1 +#define CEPH_LOCK_FLOCK 2 + +#define CEPH_LOCK_SHARED 1 +#define CEPH_LOCK_EXCL 2 +#define CEPH_LOCK_UNLOCK 4 + +struct ceph_filelock { + __le64 start;/* file offset to start lock at */ + __le64 length; /* num bytes to lock; 0 for all following start */ + __le64 client; /* which client holds the lock */ + __le64 pid; /* process id holding the lock on the client */ + __le64 pid_namespace; + __u8 type; /* shared lock, exclusive lock, or unlock */ +} __attribute__ ((packed)); + + +/* file access modes */ +#define CEPH_FILE_MODE_PIN 0 +#define CEPH_FILE_MODE_RD 1 +#define CEPH_FILE_MODE_WR 2 +#define CEPH_FILE_MODE_RDWR 3 /* RD | WR */ +#define CEPH_FILE_MODE_LAZY 4 /* lazy io */ +#define CEPH_FILE_MODE_NUM 8 /* bc these are bit fields.. mostly */ + +int ceph_flags_to_mode(int flags); + + +/* capability bits */ +#define CEPH_CAP_PIN 1 /* no specific capabilities beyond the pin */ + +/* generic cap bits */ +#define CEPH_CAP_GSHARED 1 /* client can reads */ +#define CEPH_CAP_GEXCL 2 /* client can read and update */ +#define CEPH_CAP_GCACHE 4 /* (file) client can cache reads */ +#define CEPH_CAP_GRD 8 /* (file) client can read */ +#define CEPH_CAP_GWR 16 /* (file) client can write */ +#define CEPH_CAP_GBUFFER 32 /* (file) client can buffer writes */ +#define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */ +#define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */ + +/* per-lock shift */ +#define CEPH_CAP_SAUTH 2 +#define CEPH_CAP_SLINK 4 +#define CEPH_CAP_SXATTR 6 +#define CEPH_CAP_SFILE 8 +#define CEPH_CAP_SFLOCK 20 + +#define CEPH_CAP_BITS 22 + +/* composed values */ +#define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) +#define CEPH_CAP_AUTH_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SAUTH) +#define CEPH_CAP_LINK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SLINK) +#define CEPH_CAP_LINK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SLINK) +#define CEPH_CAP_XATTR_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SXATTR) +#define CEPH_CAP_XATTR_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SXATTR) +#define CEPH_CAP_FILE(x) (x << CEPH_CAP_SFILE) +#define CEPH_CAP_FILE_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFILE) +#define CEPH_CAP_FILE_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFILE) +#define CEPH_CAP_FILE_CACHE (CEPH_CAP_GCACHE << CEPH_CAP_SFILE) +#define CEPH_CAP_FILE_RD (CEPH_CAP_GRD << CEPH_CAP_SFILE) +#define CEPH_CAP_FILE_WR (CEPH_CAP_GWR << CEPH_CAP_SFILE) +#define CEPH_CAP_FILE_BUFFER (CEPH_CAP_GBUFFER << CEPH_CAP_SFILE) +#define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE) +#define CEPH_CAP_FILE_LAZYIO (CEPH_CAP_GLAZYIO << CEPH_CAP_SFILE) +#define CEPH_CAP_FLOCK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFLOCK) +#define CEPH_CAP_FLOCK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFLOCK) + + +/* cap masks (for getattr) */ +#define CEPH_STAT_CAP_INODE CEPH_CAP_PIN +#define CEPH_STAT_CAP_TYPE CEPH_CAP_PIN /* mode >> 12 */ +#define CEPH_STAT_CAP_SYMLINK CEPH_CAP_PIN +#define CEPH_STAT_CAP_UID CEPH_CAP_AUTH_SHARED +#define CEPH_STAT_CAP_GID CEPH_CAP_AUTH_SHARED +#define CEPH_STAT_CAP_MODE CEPH_CAP_AUTH_SHARED +#define CEPH_STAT_CAP_NLINK CEPH_CAP_LINK_SHARED +#define CEPH_STAT_CAP_LAYOUT CEPH_CAP_FILE_SHARED +#define CEPH_STAT_CAP_MTIME CEPH_CAP_FILE_SHARED +#define CEPH_STAT_CAP_SIZE CEPH_CAP_FILE_SHARED +#define CEPH_STAT_CAP_ATIME CEPH_CAP_FILE_SHARED /* fixme */ +#define CEPH_STAT_CAP_XATTR CEPH_CAP_XATTR_SHARED +#define CEPH_STAT_CAP_INODE_ALL (CEPH_CAP_PIN | \ + CEPH_CAP_AUTH_SHARED | \ + CEPH_CAP_LINK_SHARED | \ + CEPH_CAP_FILE_SHARED | \ + CEPH_CAP_XATTR_SHARED) + +#define CEPH_CAP_ANY_SHARED (CEPH_CAP_AUTH_SHARED | \ + CEPH_CAP_LINK_SHARED | \ + CEPH_CAP_XATTR_SHARED | \ + CEPH_CAP_FILE_SHARED) +#define CEPH_CAP_ANY_RD (CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_RD | \ + CEPH_CAP_FILE_CACHE) + +#define CEPH_CAP_ANY_EXCL (CEPH_CAP_AUTH_EXCL | \ + CEPH_CAP_LINK_EXCL | \ + CEPH_CAP_XATTR_EXCL | \ + CEPH_CAP_FILE_EXCL) +#define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | \ + CEPH_CAP_FILE_EXCL) +#define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR) +#define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \ + CEPH_CAP_ANY_FILE_WR | CEPH_CAP_FILE_LAZYIO | \ + CEPH_CAP_PIN) + +#define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \ + CEPH_LOCK_IXATTR) + +int ceph_caps_for_mode(int mode); + +enum { + CEPH_CAP_OP_GRANT, /* mds->client grant */ + CEPH_CAP_OP_REVOKE, /* mds->client revoke */ + CEPH_CAP_OP_TRUNC, /* mds->client trunc notify */ + CEPH_CAP_OP_EXPORT, /* mds has exported the cap */ + CEPH_CAP_OP_IMPORT, /* mds has imported the cap */ + CEPH_CAP_OP_UPDATE, /* client->mds update */ + CEPH_CAP_OP_DROP, /* client->mds drop cap bits */ + CEPH_CAP_OP_FLUSH, /* client->mds cap writeback */ + CEPH_CAP_OP_FLUSH_ACK, /* mds->client flushed */ + CEPH_CAP_OP_FLUSHSNAP, /* client->mds flush snapped metadata */ + CEPH_CAP_OP_FLUSHSNAP_ACK, /* mds->client flushed snapped metadata */ + CEPH_CAP_OP_RELEASE, /* client->mds release (clean) cap */ + CEPH_CAP_OP_RENEW, /* client->mds renewal request */ +}; + +extern const char *ceph_cap_op_name(int op); + +/* + * caps message, used for capability callbacks, acks, requests, etc. + */ +struct ceph_mds_caps { + __le32 op; /* CEPH_CAP_OP_* */ + __le64 ino, realm; + __le64 cap_id; + __le32 seq, issue_seq; + __le32 caps, wanted, dirty; /* latest issued/wanted/dirty */ + __le32 migrate_seq; + __le64 snap_follows; + __le32 snap_trace_len; + + /* authlock */ + __le32 uid, gid, mode; + + /* linklock */ + __le32 nlink; + + /* xattrlock */ + __le32 xattr_len; + __le64 xattr_version; + + /* filelock */ + __le64 size, max_size, truncate_size; + __le32 truncate_seq; + struct ceph_timespec mtime, atime, ctime; + struct ceph_file_layout layout; + __le32 time_warp_seq; +} __attribute__ ((packed)); + +/* cap release msg head */ +struct ceph_mds_cap_release { + __le32 num; /* number of cap_items that follow */ +} __attribute__ ((packed)); + +struct ceph_mds_cap_item { + __le64 ino; + __le64 cap_id; + __le32 migrate_seq, seq; +} __attribute__ ((packed)); + +#define CEPH_MDS_LEASE_REVOKE 1 /* mds -> client */ +#define CEPH_MDS_LEASE_RELEASE 2 /* client -> mds */ +#define CEPH_MDS_LEASE_RENEW 3 /* client <-> mds */ +#define CEPH_MDS_LEASE_REVOKE_ACK 4 /* client -> mds */ + +extern const char *ceph_lease_op_name(int o); + +/* lease msg header */ +struct ceph_mds_lease { + __u8 action; /* CEPH_MDS_LEASE_* */ + __le16 mask; /* which lease */ + __le64 ino; + __le64 first, last; /* snap range */ + __le32 seq; + __le32 duration_ms; /* duration of renewal */ +} __attribute__ ((packed)); +/* followed by a __le32+string for dname */ + +/* client reconnect */ +struct ceph_mds_cap_reconnect { + __le64 cap_id; + __le32 wanted; + __le32 issued; + __le64 snaprealm; + __le64 pathbase; /* base ino for our path to this ino */ + __le32 flock_len; /* size of flock state blob, if any */ +} __attribute__ ((packed)); +/* followed by flock blob */ + +struct ceph_mds_cap_reconnect_v1 { + __le64 cap_id; + __le32 wanted; + __le32 issued; + __le64 size; + struct ceph_timespec mtime, atime; + __le64 snaprealm; + __le64 pathbase; /* base ino for our path to this ino */ +} __attribute__ ((packed)); + +struct ceph_mds_snaprealm_reconnect { + __le64 ino; /* snap realm base */ + __le64 seq; /* snap seq for this snap realm */ + __le64 parent; /* parent realm */ +} __attribute__ ((packed)); + +/* + * snaps + */ +enum { + CEPH_SNAP_OP_UPDATE, /* CREATE or DESTROY */ + CEPH_SNAP_OP_CREATE, + CEPH_SNAP_OP_DESTROY, + CEPH_SNAP_OP_SPLIT, +}; + +extern const char *ceph_snap_op_name(int o); + +/* snap msg header */ +struct ceph_mds_snap_head { + __le32 op; /* CEPH_SNAP_OP_* */ + __le64 split; /* ino to split off, if any */ + __le32 num_split_inos; /* # inos belonging to new child realm */ + __le32 num_split_realms; /* # child realms udner new child realm */ + __le32 trace_len; /* size of snap trace blob */ +} __attribute__ ((packed)); +/* followed by split ino list, then split realms, then the trace blob */ + +/* + * encode info about a snaprealm, as viewed by a client + */ +struct ceph_mds_snap_realm { + __le64 ino; /* ino */ + __le64 created; /* snap: when created */ + __le64 parent; /* ino: parent realm */ + __le64 parent_since; /* snap: same parent since */ + __le64 seq; /* snap: version */ + __le32 num_snaps; + __le32 num_prior_parent_snaps; +} __attribute__ ((packed)); +/* followed by my snap list, then prior parent snap list */ + +#endif diff --git a/include/linux/ceph/ceph_hash.h b/include/linux/ceph/ceph_hash.h new file mode 100644 index 000000000000..d099c3f90236 --- /dev/null +++ b/include/linux/ceph/ceph_hash.h @@ -0,0 +1,13 @@ +#ifndef FS_CEPH_HASH_H +#define FS_CEPH_HASH_H + +#define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */ +#define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */ + +extern unsigned ceph_str_hash_linux(const char *s, unsigned len); +extern unsigned ceph_str_hash_rjenkins(const char *s, unsigned len); + +extern unsigned ceph_str_hash(int type, const char *s, unsigned len); +extern const char *ceph_str_hash_name(int type); + +#endif diff --git a/include/linux/ceph/debugfs.h b/include/linux/ceph/debugfs.h new file mode 100644 index 000000000000..2a79702e092b --- /dev/null +++ b/include/linux/ceph/debugfs.h @@ -0,0 +1,33 @@ +#ifndef _FS_CEPH_DEBUGFS_H +#define _FS_CEPH_DEBUGFS_H + +#include "ceph_debug.h" +#include "types.h" + +#define CEPH_DEFINE_SHOW_FUNC(name) \ +static int name##_open(struct inode *inode, struct file *file) \ +{ \ + struct seq_file *sf; \ + int ret; \ + \ + ret = single_open(file, name, NULL); \ + sf = file->private_data; \ + sf->private = inode->i_private; \ + return ret; \ +} \ + \ +static const struct file_operations name##_fops = { \ + .open = name##_open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ +}; + +/* debugfs.c */ +extern int ceph_debugfs_init(void); +extern void ceph_debugfs_cleanup(void); +extern int ceph_debugfs_client_init(struct ceph_client *client); +extern void ceph_debugfs_client_cleanup(struct ceph_client *client); + +#endif + diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h new file mode 100644 index 000000000000..c5b6939fb32a --- /dev/null +++ b/include/linux/ceph/decode.h @@ -0,0 +1,201 @@ +#ifndef __CEPH_DECODE_H +#define __CEPH_DECODE_H + +#include <asm/unaligned.h> +#include <linux/time.h> + +#include "types.h" + +/* + * in all cases, + * void **p pointer to position pointer + * void *end pointer to end of buffer (last byte + 1) + */ + +static inline u64 ceph_decode_64(void **p) +{ + u64 v = get_unaligned_le64(*p); + *p += sizeof(u64); + return v; +} +static inline u32 ceph_decode_32(void **p) +{ + u32 v = get_unaligned_le32(*p); + *p += sizeof(u32); + return v; +} +static inline u16 ceph_decode_16(void **p) +{ + u16 v = get_unaligned_le16(*p); + *p += sizeof(u16); + return v; +} +static inline u8 ceph_decode_8(void **p) +{ + u8 v = *(u8 *)*p; + (*p)++; + return v; +} +static inline void ceph_decode_copy(void **p, void *pv, size_t n) +{ + memcpy(pv, *p, n); + *p += n; +} + +/* + * bounds check input. + */ +#define ceph_decode_need(p, end, n, bad) \ + do { \ + if (unlikely(*(p) + (n) > (end))) \ + goto bad; \ + } while (0) + +#define ceph_decode_64_safe(p, end, v, bad) \ + do { \ + ceph_decode_need(p, end, sizeof(u64), bad); \ + v = ceph_decode_64(p); \ + } while (0) +#define ceph_decode_32_safe(p, end, v, bad) \ + do { \ + ceph_decode_need(p, end, sizeof(u32), bad); \ + v = ceph_decode_32(p); \ + } while (0) +#define ceph_decode_16_safe(p, end, v, bad) \ + do { \ + ceph_decode_need(p, end, sizeof(u16), bad); \ + v = ceph_decode_16(p); \ + } while (0) +#define ceph_decode_8_safe(p, end, v, bad) \ + do { \ + ceph_decode_need(p, end, sizeof(u8), bad); \ + v = ceph_decode_8(p); \ + } while (0) + +#define ceph_decode_copy_safe(p, end, pv, n, bad) \ + do { \ + ceph_decode_need(p, end, n, bad); \ + ceph_decode_copy(p, pv, n); \ + } while (0) + +/* + * struct ceph_timespec <-> struct timespec + */ +static inline void ceph_decode_timespec(struct timespec *ts, + const struct ceph_timespec *tv) +{ + ts->tv_sec = le32_to_cpu(tv->tv_sec); + ts->tv_nsec = le32_to_cpu(tv->tv_nsec); +} +static inline void ceph_encode_timespec(struct ceph_timespec *tv, + const struct timespec *ts) +{ + tv->tv_sec = cpu_to_le32(ts->tv_sec); + tv->tv_nsec = cpu_to_le32(ts->tv_nsec); +} + +/* + * sockaddr_storage <-> ceph_sockaddr + */ +static inline void ceph_encode_addr(struct ceph_entity_addr *a) +{ + __be16 ss_family = htons(a->in_addr.ss_family); + a->in_addr.ss_family = *(__u16 *)&ss_family; +} +static inline void ceph_decode_addr(struct ceph_entity_addr *a) +{ + __be16 ss_family = *(__be16 *)&a->in_addr.ss_family; + a->in_addr.ss_family = ntohs(ss_family); + WARN_ON(a->in_addr.ss_family == 512); +} + +/* + * encoders + */ +static inline void ceph_encode_64(void **p, u64 v) +{ + put_unaligned_le64(v, (__le64 *)*p); + *p += sizeof(u64); +} +static inline void ceph_encode_32(void **p, u32 v) +{ + put_unaligned_le32(v, (__le32 *)*p); + *p += sizeof(u32); +} +static inline void ceph_encode_16(void **p, u16 v) +{ + put_unaligned_le16(v, (__le16 *)*p); + *p += sizeof(u16); +} +static inline void ceph_encode_8(void **p, u8 v) +{ + *(u8 *)*p = v; + (*p)++; +} +static inline void ceph_encode_copy(void **p, const void *s, int len) +{ + memcpy(*p, s, len); + *p += len; +} + +/* + * filepath, string encoders + */ +static inline void ceph_encode_filepath(void **p, void *end, + u64 ino, const char *path) +{ + u32 len = path ? strlen(path) : 0; + BUG_ON(*p + sizeof(ino) + sizeof(len) + len > end); + ceph_encode_8(p, 1); + ceph_encode_64(p, ino); + ceph_encode_32(p, len); + if (len) + memcpy(*p, path, len); + *p += len; +} + +static inline void ceph_encode_string(void **p, void *end, + const char *s, u32 len) +{ + BUG_ON(*p + sizeof(len) + len > end); + ceph_encode_32(p, len); + if (len) + memcpy(*p, s, len); + *p += len; +} + +#define ceph_encode_need(p, end, n, bad) \ + do { \ + if (unlikely(*(p) + (n) > (end))) \ + goto bad; \ + } while (0) + +#define ceph_encode_64_safe(p, end, v, bad) \ + do { \ + ceph_encode_need(p, end, sizeof(u64), bad); \ + ceph_encode_64(p, v); \ + } while (0) +#define ceph_encode_32_safe(p, end, v, bad) \ + do { \ + ceph_encode_need(p, end, sizeof(u32), bad); \ + ceph_encode_32(p, v); \ + } while (0) +#define ceph_encode_16_safe(p, end, v, bad) \ + do { \ + ceph_encode_need(p, end, sizeof(u16), bad); \ + ceph_encode_16(p, v); \ + } while (0) + +#define ceph_encode_copy_safe(p, end, pv, n, bad) \ + do { \ + ceph_encode_need(p, end, n, bad); \ + ceph_encode_copy(p, pv, n); \ + } while (0) +#define ceph_encode_string_safe(p, end, s, n, bad) \ + do { \ + ceph_encode_need(p, end, n, bad); \ + ceph_encode_string(p, end, s, n); \ + } while (0) + + +#endif diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h new file mode 100644 index 000000000000..f22b2e941686 --- /dev/null +++ b/include/linux/ceph/libceph.h @@ -0,0 +1,249 @@ +#ifndef _FS_CEPH_LIBCEPH_H +#define _FS_CEPH_LIBCEPH_H + +#include "ceph_debug.h" + +#include <asm/unaligned.h> +#include <linux/backing-dev.h> +#include <linux/completion.h> +#include <linux/exportfs.h> +#include <linux/fs.h> +#include <linux/mempool.h> +#include <linux/pagemap.h> +#include <linux/wait.h> +#include <linux/writeback.h> +#include <linux/slab.h> + +#include "types.h" +#include "messenger.h" +#include "msgpool.h" +#include "mon_client.h" +#include "osd_client.h" +#include "ceph_fs.h" + +/* + * Supported features + */ +#define CEPH_FEATURE_SUPPORTED_DEFAULT CEPH_FEATURE_NOSRCADDR +#define CEPH_FEATURE_REQUIRED_DEFAULT CEPH_FEATURE_NOSRCADDR + +/* + * mount options + */ +#define CEPH_OPT_FSID (1<<0) +#define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */ +#define CEPH_OPT_MYIP (1<<2) /* specified my ip */ +#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */ + +#define CEPH_OPT_DEFAULT (0); + +#define ceph_set_opt(client, opt) \ + (client)->options->flags |= CEPH_OPT_##opt; +#define ceph_test_opt(client, opt) \ + (!!((client)->options->flags & CEPH_OPT_##opt)) + +struct ceph_options { + int flags; + struct ceph_fsid fsid; + struct ceph_entity_addr my_addr; + int mount_timeout; + int osd_idle_ttl; + int osd_timeout; + int osd_keepalive_timeout; + + /* + * any type that can't be simply compared or doesn't need need + * to be compared should go beyond this point, + * ceph_compare_options() should be updated accordingly + */ + + struct ceph_entity_addr *mon_addr; /* should be the first + pointer type of args */ + int num_mon; + char *name; + char *secret; +}; + +/* + * defaults + */ +#define CEPH_MOUNT_TIMEOUT_DEFAULT 60 +#define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */ +#define CEPH_OSD_KEEPALIVE_DEFAULT 5 +#define CEPH_OSD_IDLE_TTL_DEFAULT 60 +#define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ + +#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) +#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) + +#define CEPH_AUTH_NAME_DEFAULT "guest" + +/* + * Delay telling the MDS we no longer want caps, in case we reopen + * the file. Delay a minimum amount of time, even if we send a cap + * message for some other reason. Otherwise, take the oppotunity to + * update the mds to avoid sending another message later. + */ +#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ +#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ + +#define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) + +/* mount state */ +enum { + CEPH_MOUNT_MOUNTING, + CEPH_MOUNT_MOUNTED, + CEPH_MOUNT_UNMOUNTING, + CEPH_MOUNT_UNMOUNTED, + CEPH_MOUNT_SHUTDOWN, +}; + +/* + * subtract jiffies + */ +static inline unsigned long time_sub(unsigned long a, unsigned long b) +{ + BUG_ON(time_after(b, a)); + return (long)a - (long)b; +} + +struct ceph_mds_client; + +/* + * per client state + * + * possibly shared by multiple mount points, if they are + * mounting the same ceph filesystem/cluster. + */ +struct ceph_client { + struct ceph_fsid fsid; + bool have_fsid; + + void *private; + + struct ceph_options *options; + + struct mutex mount_mutex; /* serialize mount attempts */ + wait_queue_head_t auth_wq; + int auth_err; + + int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *); + + u32 supported_features; + u32 required_features; + + struct ceph_messenger *msgr; /* messenger instance */ + struct ceph_mon_client monc; + struct ceph_osd_client osdc; + +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs_dir; + struct dentry *debugfs_monmap; + struct dentry *debugfs_osdmap; +#endif +}; + + + +/* + * snapshots + */ + +/* + * A "snap context" is the set of existing snapshots when we + * write data. It is used by the OSD to guide its COW behavior. + * + * The ceph_snap_context is refcounted, and attached to each dirty + * page, indicating which context the dirty data belonged when it was + * dirtied. + */ +struct ceph_snap_context { + atomic_t nref; + u64 seq; + int num_snaps; + u64 snaps[]; +}; + +static inline struct ceph_snap_context * +ceph_get_snap_context(struct ceph_snap_context *sc) +{ + /* + printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), + atomic_read(&sc->nref)+1); + */ + if (sc) + atomic_inc(&sc->nref); + return sc; +} + +static inline void ceph_put_snap_context(struct ceph_snap_context *sc) +{ + if (!sc) + return; + /* + printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), + atomic_read(&sc->nref)-1); + */ + if (atomic_dec_and_test(&sc->nref)) { + /*printk(" deleting snap_context %p\n", sc);*/ + kfree(sc); + } +} + +/* + * calculate the number of pages a given length and offset map onto, + * if we align the data. + */ +static inline int calc_pages_for(u64 off, u64 len) +{ + return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) - + (off >> PAGE_CACHE_SHIFT); +} + +/* ceph_common.c */ +extern const char *ceph_msg_type_name(int type); +extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); +extern struct kmem_cache *ceph_inode_cachep; +extern struct kmem_cache *ceph_cap_cachep; +extern struct kmem_cache *ceph_dentry_cachep; +extern struct kmem_cache *ceph_file_cachep; + +extern int ceph_parse_options(struct ceph_options **popt, char *options, + const char *dev_name, const char *dev_name_end, + int (*parse_extra_token)(char *c, void *private), + void *private); +extern void ceph_destroy_options(struct ceph_options *opt); +extern int ceph_compare_options(struct ceph_options *new_opt, + struct ceph_client *client); +extern struct ceph_client *ceph_create_client(struct ceph_options *opt, + void *private); +extern u64 ceph_client_id(struct ceph_client *client); +extern void ceph_destroy_client(struct ceph_client *client); +extern int __ceph_open_session(struct ceph_client *client, + unsigned long started); +extern int ceph_open_session(struct ceph_client *client); + +/* pagevec.c */ +extern void ceph_release_page_vector(struct page **pages, int num_pages); + +extern struct page **ceph_get_direct_page_vector(const char __user *data, + int num_pages, + loff_t off, size_t len); +extern void ceph_put_page_vector(struct page **pages, int num_pages); +extern void ceph_release_page_vector(struct page **pages, int num_pages); +extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); +extern int ceph_copy_user_to_page_vector(struct page **pages, + const char __user *data, + loff_t off, size_t len); +extern int ceph_copy_to_page_vector(struct page **pages, + const char *data, + loff_t off, size_t len); +extern int ceph_copy_from_page_vector(struct page **pages, + char *data, + loff_t off, size_t len); +extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data, + loff_t off, size_t len); +extern void ceph_zero_page_vector_range(int off, int len, struct page **pages); + + +#endif /* _FS_CEPH_SUPER_H */ diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h new file mode 100644 index 000000000000..4c5cb0880bba --- /dev/null +++ b/include/linux/ceph/mdsmap.h @@ -0,0 +1,62 @@ +#ifndef _FS_CEPH_MDSMAP_H +#define _FS_CEPH_MDSMAP_H + +#include "types.h" + +/* + * mds map - describe servers in the mds cluster. + * + * we limit fields to those the client actually xcares about + */ +struct ceph_mds_info { + u64 global_id; + struct ceph_entity_addr addr; + s32 state; + int num_export_targets; + bool laggy; + u32 *export_targets; +}; + +struct ceph_mdsmap { + u32 m_epoch, m_client_epoch, m_last_failure; + u32 m_root; + u32 m_session_timeout; /* seconds */ + u32 m_session_autoclose; /* seconds */ + u64 m_max_file_size; + u32 m_max_mds; /* size of m_addr, m_state arrays */ + struct ceph_mds_info *m_info; + + /* which object pools file data can be stored in */ + int m_num_data_pg_pools; + u32 *m_data_pg_pools; + u32 m_cas_pg_pool; +}; + +static inline struct ceph_entity_addr * +ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) +{ + if (w >= m->m_max_mds) + return NULL; + return &m->m_info[w].addr; +} + +static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w) +{ + BUG_ON(w < 0); + if (w >= m->m_max_mds) + return CEPH_MDS_STATE_DNE; + return m->m_info[w].state; +} + +static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) +{ + if (w >= 0 && w < m->m_max_mds) + return m->m_info[w].laggy; + return false; +} + +extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); +extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); +extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); + +#endif diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h new file mode 100644 index 000000000000..5956d62c3057 --- /dev/null +++ b/include/linux/ceph/messenger.h @@ -0,0 +1,261 @@ +#ifndef __FS_CEPH_MESSENGER_H +#define __FS_CEPH_MESSENGER_H + +#include <linux/kref.h> +#include <linux/mutex.h> +#include <linux/net.h> +#include <linux/radix-tree.h> +#include <linux/uio.h> +#include <linux/version.h> +#include <linux/workqueue.h> + +#include "types.h" +#include "buffer.h" + +struct ceph_msg; +struct ceph_connection; + +extern struct workqueue_struct *ceph_msgr_wq; /* receive work queue */ + +/* + * Ceph defines these callbacks for handling connection events. + */ +struct ceph_connection_operations { + struct ceph_connection *(*get)(struct ceph_connection *); + void (*put)(struct ceph_connection *); + + /* handle an incoming message. */ + void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m); + + /* authorize an outgoing connection */ + int (*get_authorizer) (struct ceph_connection *con, + void **buf, int *len, int *proto, + void **reply_buf, int *reply_len, int force_new); + int (*verify_authorizer_reply) (struct ceph_connection *con, int len); + int (*invalidate_authorizer)(struct ceph_connection *con); + + /* protocol version mismatch */ + void (*bad_proto) (struct ceph_connection *con); + + /* there was some error on the socket (disconnect, whatever) */ + void (*fault) (struct ceph_connection *con); + + /* a remote host as terminated a message exchange session, and messages + * we sent (or they tried to send us) may be lost. */ + void (*peer_reset) (struct ceph_connection *con); + + struct ceph_msg * (*alloc_msg) (struct ceph_connection *con, + struct ceph_msg_header *hdr, + int *skip); +}; + +/* use format string %s%d */ +#define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num) + +struct ceph_messenger { + struct ceph_entity_inst inst; /* my name+address */ + struct ceph_entity_addr my_enc_addr; + struct page *zero_page; /* used in certain error cases */ + + bool nocrc; + + /* + * the global_seq counts connections i (attempt to) initiate + * in order to disambiguate certain connect race conditions. + */ + u32 global_seq; + spinlock_t global_seq_lock; + + u32 supported_features; + u32 required_features; +}; + +/* + * a single message. it contains a header (src, dest, message type, etc.), + * footer (crc values, mainly), a "front" message body, and possibly a + * data payload (stored in some number of pages). + */ +struct ceph_msg { + struct ceph_msg_header hdr; /* header */ + struct ceph_msg_footer footer; /* footer */ + struct kvec front; /* unaligned blobs of message */ + struct ceph_buffer *middle; + struct page **pages; /* data payload. NOT OWNER. */ + unsigned nr_pages; /* size of page array */ + struct ceph_pagelist *pagelist; /* instead of pages */ + struct list_head list_head; + struct kref kref; + struct bio *bio; /* instead of pages/pagelist */ + struct bio *bio_iter; /* bio iterator */ + int bio_seg; /* current bio segment */ + struct ceph_pagelist *trail; /* the trailing part of the data */ + bool front_is_vmalloc; + bool more_to_follow; + bool needs_out_seq; + int front_max; + + struct ceph_msgpool *pool; +}; + +struct ceph_msg_pos { + int page, page_pos; /* which page; offset in page */ + int data_pos; /* offset in data payload */ + int did_page_crc; /* true if we've calculated crc for current page */ +}; + +/* ceph connection fault delay defaults, for exponential backoff */ +#define BASE_DELAY_INTERVAL (HZ/2) +#define MAX_DELAY_INTERVAL (5 * 60 * HZ) + +/* + * ceph_connection state bit flags + * + * QUEUED and BUSY are used together to ensure that only a single + * thread is currently opening, reading or writing data to the socket. + */ +#define LOSSYTX 0 /* we can close channel or drop messages on errors */ +#define CONNECTING 1 +#define NEGOTIATING 2 +#define KEEPALIVE_PENDING 3 +#define WRITE_PENDING 4 /* we have data ready to send */ +#define QUEUED 5 /* there is work queued on this connection */ +#define BUSY 6 /* work is being done */ +#define STANDBY 8 /* no outgoing messages, socket closed. we keep + * the ceph_connection around to maintain shared + * state with the peer. */ +#define CLOSED 10 /* we've closed the connection */ +#define SOCK_CLOSED 11 /* socket state changed to closed */ +#define OPENING 13 /* open connection w/ (possibly new) peer */ +#define DEAD 14 /* dead, about to kfree */ + +/* + * A single connection with another host. + * + * We maintain a queue of outgoing messages, and some session state to + * ensure that we can preserve the lossless, ordered delivery of + * messages in the case of a TCP disconnect. + */ +struct ceph_connection { + void *private; + atomic_t nref; + + const struct ceph_connection_operations *ops; + + struct ceph_messenger *msgr; + struct socket *sock; + unsigned long state; /* connection state (see flags above) */ + const char *error_msg; /* error message, if any */ + + struct ceph_entity_addr peer_addr; /* peer address */ + struct ceph_entity_name peer_name; /* peer name */ + struct ceph_entity_addr peer_addr_for_me; + unsigned peer_features; + u32 connect_seq; /* identify the most recent connection + attempt for this connection, client */ + u32 peer_global_seq; /* peer's global seq for this connection */ + + int auth_retry; /* true if we need a newer authorizer */ + void *auth_reply_buf; /* where to put the authorizer reply */ + int auth_reply_buf_len; + + struct mutex mutex; + + /* out queue */ + struct list_head out_queue; + struct list_head out_sent; /* sending or sent but unacked */ + u64 out_seq; /* last message queued for send */ + bool out_keepalive_pending; + + u64 in_seq, in_seq_acked; /* last message received, acked */ + + /* connection negotiation temps */ + char in_banner[CEPH_BANNER_MAX_LEN]; + union { + struct { /* outgoing connection */ + struct ceph_msg_connect out_connect; + struct ceph_msg_connect_reply in_reply; + }; + struct { /* incoming */ + struct ceph_msg_connect in_connect; + struct ceph_msg_connect_reply out_reply; + }; + }; + struct ceph_entity_addr actual_peer_addr; + + /* message out temps */ + struct ceph_msg *out_msg; /* sending message (== tail of + out_sent) */ + bool out_msg_done; + struct ceph_msg_pos out_msg_pos; + + struct kvec out_kvec[8], /* sending header/footer data */ + *out_kvec_cur; + int out_kvec_left; /* kvec's left in out_kvec */ + int out_skip; /* skip this many bytes */ + int out_kvec_bytes; /* total bytes left */ + bool out_kvec_is_msg; /* kvec refers to out_msg */ + int out_more; /* there is more data after the kvecs */ + __le64 out_temp_ack; /* for writing an ack */ + + /* message in temps */ + struct ceph_msg_header in_hdr; + struct ceph_msg *in_msg; + struct ceph_msg_pos in_msg_pos; + u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */ + + char in_tag; /* protocol control byte */ + int in_base_pos; /* bytes read */ + __le64 in_temp_ack; /* for reading an ack */ + + struct delayed_work work; /* send|recv work */ + unsigned long delay; /* current delay interval */ +}; + + +extern const char *ceph_pr_addr(const struct sockaddr_storage *ss); +extern int ceph_parse_ips(const char *c, const char *end, + struct ceph_entity_addr *addr, + int max_count, int *count); + + +extern int ceph_msgr_init(void); +extern void ceph_msgr_exit(void); +extern void ceph_msgr_flush(void); + +extern struct ceph_messenger *ceph_messenger_create( + struct ceph_entity_addr *myaddr, + u32 features, u32 required); +extern void ceph_messenger_destroy(struct ceph_messenger *); + +extern void ceph_con_init(struct ceph_messenger *msgr, + struct ceph_connection *con); +extern void ceph_con_open(struct ceph_connection *con, + struct ceph_entity_addr *addr); +extern bool ceph_con_opened(struct ceph_connection *con); +extern void ceph_con_close(struct ceph_connection *con); +extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg); +extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg); +extern void ceph_con_revoke_message(struct ceph_connection *con, + struct ceph_msg *msg); +extern void ceph_con_keepalive(struct ceph_connection *con); +extern struct ceph_connection *ceph_con_get(struct ceph_connection *con); +extern void ceph_con_put(struct ceph_connection *con); + +extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags); +extern void ceph_msg_kfree(struct ceph_msg *m); + + +static inline struct ceph_msg *ceph_msg_get(struct ceph_msg *msg) +{ + kref_get(&msg->kref); + return msg; +} +extern void ceph_msg_last_put(struct kref *kref); +static inline void ceph_msg_put(struct ceph_msg *msg) +{ + kref_put(&msg->kref, ceph_msg_last_put); +} + +extern void ceph_msg_dump(struct ceph_msg *msg); + +#endif diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h new file mode 100644 index 000000000000..545f85917780 --- /dev/null +++ b/include/linux/ceph/mon_client.h @@ -0,0 +1,122 @@ +#ifndef _FS_CEPH_MON_CLIENT_H +#define _FS_CEPH_MON_CLIENT_H + +#include <linux/completion.h> +#include <linux/kref.h> +#include <linux/rbtree.h> + +#include "messenger.h" + +struct ceph_client; +struct ceph_mount_args; +struct ceph_auth_client; + +/* + * The monitor map enumerates the set of all monitors. + */ +struct ceph_monmap { + struct ceph_fsid fsid; + u32 epoch; + u32 num_mon; + struct ceph_entity_inst mon_inst[0]; +}; + +struct ceph_mon_client; +struct ceph_mon_generic_request; + + +/* + * Generic mechanism for resending monitor requests. + */ +typedef void (*ceph_monc_request_func_t)(struct ceph_mon_client *monc, + int newmon); + +/* a pending monitor request */ +struct ceph_mon_request { + struct ceph_mon_client *monc; + struct delayed_work delayed_work; + unsigned long delay; + ceph_monc_request_func_t do_request; +}; + +/* + * ceph_mon_generic_request is being used for the statfs and poolop requests + * which are bening done a bit differently because we need to get data back + * to the caller + */ +struct ceph_mon_generic_request { + struct kref kref; + u64 tid; + struct rb_node node; + int result; + void *buf; + int buf_len; + struct completion completion; + struct ceph_msg *request; /* original request */ + struct ceph_msg *reply; /* and reply */ +}; + +struct ceph_mon_client { + struct ceph_client *client; + struct ceph_monmap *monmap; + + struct mutex mutex; + struct delayed_work delayed_work; + + struct ceph_auth_client *auth; + struct ceph_msg *m_auth, *m_auth_reply, *m_subscribe, *m_subscribe_ack; + int pending_auth; + + bool hunting; + int cur_mon; /* last monitor i contacted */ + unsigned long sub_sent, sub_renew_after; + struct ceph_connection *con; + bool have_fsid; + + /* pending generic requests */ + struct rb_root generic_request_tree; + int num_generic_requests; + u64 last_tid; + + /* mds/osd map */ + int want_mdsmap; + int want_next_osdmap; /* 1 = want, 2 = want+asked */ + u32 have_osdmap, have_mdsmap; + +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs_file; +#endif +}; + +extern struct ceph_monmap *ceph_monmap_decode(void *p, void *end); +extern int ceph_monmap_contains(struct ceph_monmap *m, + struct ceph_entity_addr *addr); + +extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl); +extern void ceph_monc_stop(struct ceph_mon_client *monc); + +/* + * The model here is to indicate that we need a new map of at least + * epoch @want, and also call in when we receive a map. We will + * periodically rerequest the map from the monitor cluster until we + * get what we want. + */ +extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have); +extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have); + +extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc); + +extern int ceph_monc_do_statfs(struct ceph_mon_client *monc, + struct ceph_statfs *buf); + +extern int ceph_monc_open_session(struct ceph_mon_client *monc); + +extern int ceph_monc_validate_auth(struct ceph_mon_client *monc); + +extern int ceph_monc_create_snapid(struct ceph_mon_client *monc, + u32 pool, u64 *snapid); + +extern int ceph_monc_delete_snapid(struct ceph_mon_client *monc, + u32 pool, u64 snapid); + +#endif diff --git a/include/linux/ceph/msgpool.h b/include/linux/ceph/msgpool.h new file mode 100644 index 000000000000..a362605f9368 --- /dev/null +++ b/include/linux/ceph/msgpool.h @@ -0,0 +1,25 @@ +#ifndef _FS_CEPH_MSGPOOL +#define _FS_CEPH_MSGPOOL + +#include <linux/mempool.h> +#include "messenger.h" + +/* + * we use memory pools for preallocating messages we may receive, to + * avoid unexpected OOM conditions. + */ +struct ceph_msgpool { + const char *name; + mempool_t *pool; + int front_len; /* preallocated payload size */ +}; + +extern int ceph_msgpool_init(struct ceph_msgpool *pool, + int front_len, int size, bool blocking, + const char *name); +extern void ceph_msgpool_destroy(struct ceph_msgpool *pool); +extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *, + int front_len); +extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *); + +#endif diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h new file mode 100644 index 000000000000..680d3d648cac --- /dev/null +++ b/include/linux/ceph/msgr.h @@ -0,0 +1,175 @@ +#ifndef CEPH_MSGR_H +#define CEPH_MSGR_H + +/* + * Data types for message passing layer used by Ceph. + */ + +#define CEPH_MON_PORT 6789 /* default monitor port */ + +/* + * client-side processes will try to bind to ports in this + * range, simply for the benefit of tools like nmap or wireshark + * that would like to identify the protocol. + */ +#define CEPH_PORT_FIRST 6789 +#define CEPH_PORT_START 6800 /* non-monitors start here */ +#define CEPH_PORT_LAST 6900 + +/* + * tcp connection banner. include a protocol version. and adjust + * whenever the wire protocol changes. try to keep this string length + * constant. + */ +#define CEPH_BANNER "ceph v027" +#define CEPH_BANNER_MAX_LEN 30 + + +/* + * Rollover-safe type and comparator for 32-bit sequence numbers. + * Comparator returns -1, 0, or 1. + */ +typedef __u32 ceph_seq_t; + +static inline __s32 ceph_seq_cmp(__u32 a, __u32 b) +{ + return (__s32)a - (__s32)b; +} + + +/* + * entity_name -- logical name for a process participating in the + * network, e.g. 'mds0' or 'osd3'. + */ +struct ceph_entity_name { + __u8 type; /* CEPH_ENTITY_TYPE_* */ + __le64 num; +} __attribute__ ((packed)); + +#define CEPH_ENTITY_TYPE_MON 0x01 +#define CEPH_ENTITY_TYPE_MDS 0x02 +#define CEPH_ENTITY_TYPE_OSD 0x04 +#define CEPH_ENTITY_TYPE_CLIENT 0x08 +#define CEPH_ENTITY_TYPE_AUTH 0x20 + +#define CEPH_ENTITY_TYPE_ANY 0xFF + +extern const char *ceph_entity_type_name(int type); + +/* + * entity_addr -- network address + */ +struct ceph_entity_addr { + __le32 type; + __le32 nonce; /* unique id for process (e.g. pid) */ + struct sockaddr_storage in_addr; +} __attribute__ ((packed)); + +struct ceph_entity_inst { + struct ceph_entity_name name; + struct ceph_entity_addr addr; +} __attribute__ ((packed)); + + +/* used by message exchange protocol */ +#define CEPH_MSGR_TAG_READY 1 /* server->client: ready for messages */ +#define CEPH_MSGR_TAG_RESETSESSION 2 /* server->client: reset, try again */ +#define CEPH_MSGR_TAG_WAIT 3 /* server->client: wait for racing + incoming connection */ +#define CEPH_MSGR_TAG_RETRY_SESSION 4 /* server->client + cseq: try again + with higher cseq */ +#define CEPH_MSGR_TAG_RETRY_GLOBAL 5 /* server->client + gseq: try again + with higher gseq */ +#define CEPH_MSGR_TAG_CLOSE 6 /* closing pipe */ +#define CEPH_MSGR_TAG_MSG 7 /* message */ +#define CEPH_MSGR_TAG_ACK 8 /* message ack */ +#define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ +#define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ +#define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ +#define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ + + +/* + * connection negotiation + */ +struct ceph_msg_connect { + __le64 features; /* supported feature bits */ + __le32 host_type; /* CEPH_ENTITY_TYPE_* */ + __le32 global_seq; /* count connections initiated by this host */ + __le32 connect_seq; /* count connections initiated in this session */ + __le32 protocol_version; + __le32 authorizer_protocol; + __le32 authorizer_len; + __u8 flags; /* CEPH_MSG_CONNECT_* */ +} __attribute__ ((packed)); + +struct ceph_msg_connect_reply { + __u8 tag; + __le64 features; /* feature bits for this session */ + __le32 global_seq; + __le32 connect_seq; + __le32 protocol_version; + __le32 authorizer_len; + __u8 flags; +} __attribute__ ((packed)); + +#define CEPH_MSG_CONNECT_LOSSY 1 /* messages i send may be safely dropped */ + + +/* + * message header + */ +struct ceph_msg_header_old { + __le64 seq; /* message seq# for this session */ + __le64 tid; /* transaction id */ + __le16 type; /* message type */ + __le16 priority; /* priority. higher value == higher priority */ + __le16 version; /* version of message encoding */ + + __le32 front_len; /* bytes in main payload */ + __le32 middle_len;/* bytes in middle payload */ + __le32 data_len; /* bytes of data payload */ + __le16 data_off; /* sender: include full offset; + receiver: mask against ~PAGE_MASK */ + + struct ceph_entity_inst src, orig_src; + __le32 reserved; + __le32 crc; /* header crc32c */ +} __attribute__ ((packed)); + +struct ceph_msg_header { + __le64 seq; /* message seq# for this session */ + __le64 tid; /* transaction id */ + __le16 type; /* message type */ + __le16 priority; /* priority. higher value == higher priority */ + __le16 version; /* version of message encoding */ + + __le32 front_len; /* bytes in main payload */ + __le32 middle_len;/* bytes in middle payload */ + __le32 data_len; /* bytes of data payload */ + __le16 data_off; /* sender: include full offset; + receiver: mask against ~PAGE_MASK */ + + struct ceph_entity_name src; + __le32 reserved; + __le32 crc; /* header crc32c */ +} __attribute__ ((packed)); + +#define CEPH_MSG_PRIO_LOW 64 +#define CEPH_MSG_PRIO_DEFAULT 127 +#define CEPH_MSG_PRIO_HIGH 196 +#define CEPH_MSG_PRIO_HIGHEST 255 + +/* + * follows data payload + */ +struct ceph_msg_footer { + __le32 front_crc, middle_crc, data_crc; + __u8 flags; +} __attribute__ ((packed)); + +#define CEPH_MSG_FOOTER_COMPLETE (1<<0) /* msg wasn't aborted */ +#define CEPH_MSG_FOOTER_NOCRC (1<<1) /* no data crc */ + + +#endif diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h new file mode 100644 index 000000000000..6c91fb032c39 --- /dev/null +++ b/include/linux/ceph/osd_client.h @@ -0,0 +1,234 @@ +#ifndef _FS_CEPH_OSD_CLIENT_H +#define _FS_CEPH_OSD_CLIENT_H + +#include <linux/completion.h> +#include <linux/kref.h> +#include <linux/mempool.h> +#include <linux/rbtree.h> + +#include "types.h" +#include "osdmap.h" +#include "messenger.h" + +struct ceph_msg; +struct ceph_snap_context; +struct ceph_osd_request; +struct ceph_osd_client; +struct ceph_authorizer; +struct ceph_pagelist; + +/* + * completion callback for async writepages + */ +typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *, + struct ceph_msg *); + +/* a given osd we're communicating with */ +struct ceph_osd { + atomic_t o_ref; + struct ceph_osd_client *o_osdc; + int o_osd; + int o_incarnation; + struct rb_node o_node; + struct ceph_connection o_con; + struct list_head o_requests; + struct list_head o_osd_lru; + struct ceph_authorizer *o_authorizer; + void *o_authorizer_buf, *o_authorizer_reply_buf; + size_t o_authorizer_buf_len, o_authorizer_reply_buf_len; + unsigned long lru_ttl; + int o_marked_for_keepalive; + struct list_head o_keepalive_item; +}; + +/* an in-flight request */ +struct ceph_osd_request { + u64 r_tid; /* unique for this client */ + struct rb_node r_node; + struct list_head r_req_lru_item; + struct list_head r_osd_item; + struct ceph_osd *r_osd; + struct ceph_pg r_pgid; + int r_pg_osds[CEPH_PG_MAX_SIZE]; + int r_num_pg_osds; + + struct ceph_connection *r_con_filling_msg; + + struct ceph_msg *r_request, *r_reply; + int r_result; + int r_flags; /* any additional flags for the osd */ + u32 r_sent; /* >0 if r_request is sending/sent */ + int r_got_reply; + + struct ceph_osd_client *r_osdc; + struct kref r_kref; + bool r_mempool; + struct completion r_completion, r_safe_completion; + ceph_osdc_callback_t r_callback, r_safe_callback; + struct ceph_eversion r_reassert_version; + struct list_head r_unsafe_item; + + struct inode *r_inode; /* for use by callbacks */ + void *r_priv; /* ditto */ + + char r_oid[40]; /* object name */ + int r_oid_len; + unsigned long r_stamp; /* send OR check time */ + bool r_resend; /* msg send failed, needs retry */ + + struct ceph_file_layout r_file_layout; + struct ceph_snap_context *r_snapc; /* snap context for writes */ + unsigned r_num_pages; /* size of page array (follows) */ + struct page **r_pages; /* pages for data payload */ + int r_pages_from_pool; + int r_own_pages; /* if true, i own page list */ +#ifdef CONFIG_BLOCK + struct bio *r_bio; /* instead of pages */ +#endif + + struct ceph_pagelist *r_trail; /* trailing part of the data */ +}; + +struct ceph_osd_client { + struct ceph_client *client; + + struct ceph_osdmap *osdmap; /* current map */ + struct rw_semaphore map_sem; + struct completion map_waiters; + u64 last_requested_map; + + struct mutex request_mutex; + struct rb_root osds; /* osds */ + struct list_head osd_lru; /* idle osds */ + u64 timeout_tid; /* tid of timeout triggering rq */ + u64 last_tid; /* tid of last request */ + struct rb_root requests; /* pending requests */ + struct list_head req_lru; /* pending requests lru */ + int num_requests; + struct delayed_work timeout_work; + struct delayed_work osds_timeout_work; +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs_file; +#endif + + mempool_t *req_mempool; + + struct ceph_msgpool msgpool_op; + struct ceph_msgpool msgpool_op_reply; +}; + +struct ceph_osd_req_op { + u16 op; /* CEPH_OSD_OP_* */ + u32 flags; /* CEPH_OSD_FLAG_* */ + union { + struct { + u64 offset, length; + u64 truncate_size; + u32 truncate_seq; + } extent; + struct { + const char *name; + u32 name_len; + const char *val; + u32 value_len; + __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ + __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ + } xattr; + struct { + const char *class_name; + __u8 class_len; + const char *method_name; + __u8 method_len; + __u8 argc; + const char *indata; + u32 indata_len; + } cls; + struct { + u64 cookie, count; + } pgls; + struct { + u64 snapid; + } snap; + }; + u32 payload_len; +}; + +extern int ceph_osdc_init(struct ceph_osd_client *osdc, + struct ceph_client *client); +extern void ceph_osdc_stop(struct ceph_osd_client *osdc); + +extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, + struct ceph_msg *msg); +extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, + struct ceph_msg *msg); + +extern void ceph_calc_raw_layout(struct ceph_osd_client *osdc, + struct ceph_file_layout *layout, + u64 snapid, + u64 off, u64 *plen, u64 *bno, + struct ceph_osd_request *req, + struct ceph_osd_req_op *op); + +extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, + int flags, + struct ceph_snap_context *snapc, + struct ceph_osd_req_op *ops, + bool use_mempool, + gfp_t gfp_flags, + struct page **pages, + struct bio *bio); + +extern void ceph_osdc_build_request(struct ceph_osd_request *req, + u64 off, u64 *plen, + struct ceph_osd_req_op *src_ops, + struct ceph_snap_context *snapc, + struct timespec *mtime, + const char *oid, + int oid_len); + +extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, + struct ceph_file_layout *layout, + struct ceph_vino vino, + u64 offset, u64 *len, int op, int flags, + struct ceph_snap_context *snapc, + int do_sync, u32 truncate_seq, + u64 truncate_size, + struct timespec *mtime, + bool use_mempool, int num_reply); + +static inline void ceph_osdc_get_request(struct ceph_osd_request *req) +{ + kref_get(&req->r_kref); +} +extern void ceph_osdc_release_request(struct kref *kref); +static inline void ceph_osdc_put_request(struct ceph_osd_request *req) +{ + kref_put(&req->r_kref, ceph_osdc_release_request); +} + +extern int ceph_osdc_start_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req, + bool nofail); +extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req); +extern void ceph_osdc_sync(struct ceph_osd_client *osdc); + +extern int ceph_osdc_readpages(struct ceph_osd_client *osdc, + struct ceph_vino vino, + struct ceph_file_layout *layout, + u64 off, u64 *plen, + u32 truncate_seq, u64 truncate_size, + struct page **pages, int nr_pages); + +extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, + struct ceph_vino vino, + struct ceph_file_layout *layout, + struct ceph_snap_context *sc, + u64 off, u64 len, + u32 truncate_seq, u64 truncate_size, + struct timespec *mtime, + struct page **pages, int nr_pages, + int flags, int do_sync, bool nofail); + +#endif + diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h new file mode 100644 index 000000000000..ba4c205cbb01 --- /dev/null +++ b/include/linux/ceph/osdmap.h @@ -0,0 +1,130 @@ +#ifndef _FS_CEPH_OSDMAP_H +#define _FS_CEPH_OSDMAP_H + +#include <linux/rbtree.h> +#include "types.h" +#include "ceph_fs.h" +#include <linux/crush/crush.h> + +/* + * The osd map describes the current membership of the osd cluster and + * specifies the mapping of objects to placement groups and placement + * groups to (sets of) osds. That is, it completely specifies the + * (desired) distribution of all data objects in the system at some + * point in time. + * + * Each map version is identified by an epoch, which increases monotonically. + * + * The map can be updated either via an incremental map (diff) describing + * the change between two successive epochs, or as a fully encoded map. + */ +struct ceph_pg_pool_info { + struct rb_node node; + int id; + struct ceph_pg_pool v; + int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; + char *name; +}; + +struct ceph_pg_mapping { + struct rb_node node; + struct ceph_pg pgid; + int len; + int osds[]; +}; + +struct ceph_osdmap { + struct ceph_fsid fsid; + u32 epoch; + u32 mkfs_epoch; + struct ceph_timespec created, modified; + + u32 flags; /* CEPH_OSDMAP_* */ + + u32 max_osd; /* size of osd_state, _offload, _addr arrays */ + u8 *osd_state; /* CEPH_OSD_* */ + u32 *osd_weight; /* 0 = failed, 0x10000 = 100% normal */ + struct ceph_entity_addr *osd_addr; + + struct rb_root pg_temp; + struct rb_root pg_pools; + u32 pool_max; + + /* the CRUSH map specifies the mapping of placement groups to + * the list of osds that store+replicate them. */ + struct crush_map *crush; +}; + +/* + * file layout helpers + */ +#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit)) +#define ceph_file_layout_stripe_count(l) \ + ((__s32)le32_to_cpu((l).fl_stripe_count)) +#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size)) +#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash)) +#define ceph_file_layout_object_su(l) \ + ((__s32)le32_to_cpu((l).fl_object_stripe_unit)) +#define ceph_file_layout_pg_preferred(l) \ + ((__s32)le32_to_cpu((l).fl_pg_preferred)) +#define ceph_file_layout_pg_pool(l) \ + ((__s32)le32_to_cpu((l).fl_pg_pool)) + +static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l) +{ + return le32_to_cpu(l->fl_stripe_unit) * + le32_to_cpu(l->fl_stripe_count); +} + +/* "period" == bytes before i start on a new set of objects */ +static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l) +{ + return le32_to_cpu(l->fl_object_size) * + le32_to_cpu(l->fl_stripe_count); +} + + +static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd) +{ + return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP); +} + +static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag) +{ + return map && (map->flags & flag); +} + +extern char *ceph_osdmap_state_str(char *str, int len, int state); + +static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map, + int osd) +{ + if (osd >= map->max_osd) + return NULL; + return &map->osd_addr[osd]; +} + +extern struct ceph_osdmap *osdmap_decode(void **p, void *end); +extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, + struct ceph_osdmap *map, + struct ceph_messenger *msgr); +extern void ceph_osdmap_destroy(struct ceph_osdmap *map); + +/* calculate mapping of a file extent to an object */ +extern void ceph_calc_file_object_mapping(struct ceph_file_layout *layout, + u64 off, u64 *plen, + u64 *bno, u64 *oxoff, u64 *oxlen); + +/* calculate mapping of object to a placement group */ +extern int ceph_calc_object_layout(struct ceph_object_layout *ol, + const char *oid, + struct ceph_file_layout *fl, + struct ceph_osdmap *osdmap); +extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, + int *acting); +extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, + struct ceph_pg pgid); + +extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); + +#endif diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h new file mode 100644 index 000000000000..9660d6b0a35d --- /dev/null +++ b/include/linux/ceph/pagelist.h @@ -0,0 +1,75 @@ +#ifndef __FS_CEPH_PAGELIST_H +#define __FS_CEPH_PAGELIST_H + +#include <linux/list.h> + +struct ceph_pagelist { + struct list_head head; + void *mapped_tail; + size_t length; + size_t room; + struct list_head free_list; + size_t num_pages_free; +}; + +struct ceph_pagelist_cursor { + struct ceph_pagelist *pl; /* pagelist, for error checking */ + struct list_head *page_lru; /* page in list */ + size_t room; /* room remaining to reset to */ +}; + +static inline void ceph_pagelist_init(struct ceph_pagelist *pl) +{ + INIT_LIST_HEAD(&pl->head); + pl->mapped_tail = NULL; + pl->length = 0; + pl->room = 0; + INIT_LIST_HEAD(&pl->free_list); + pl->num_pages_free = 0; +} + +extern int ceph_pagelist_release(struct ceph_pagelist *pl); + +extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l); + +extern int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space); + +extern int ceph_pagelist_free_reserve(struct ceph_pagelist *pl); + +extern void ceph_pagelist_set_cursor(struct ceph_pagelist *pl, + struct ceph_pagelist_cursor *c); + +extern int ceph_pagelist_truncate(struct ceph_pagelist *pl, + struct ceph_pagelist_cursor *c); + +static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v) +{ + __le64 ev = cpu_to_le64(v); + return ceph_pagelist_append(pl, &ev, sizeof(ev)); +} +static inline int ceph_pagelist_encode_32(struct ceph_pagelist *pl, u32 v) +{ + __le32 ev = cpu_to_le32(v); + return ceph_pagelist_append(pl, &ev, sizeof(ev)); +} +static inline int ceph_pagelist_encode_16(struct ceph_pagelist *pl, u16 v) +{ + __le16 ev = cpu_to_le16(v); + return ceph_pagelist_append(pl, &ev, sizeof(ev)); +} +static inline int ceph_pagelist_encode_8(struct ceph_pagelist *pl, u8 v) +{ + return ceph_pagelist_append(pl, &v, 1); +} +static inline int ceph_pagelist_encode_string(struct ceph_pagelist *pl, + char *s, size_t len) +{ + int ret = ceph_pagelist_encode_32(pl, len); + if (ret) + return ret; + if (len) + return ceph_pagelist_append(pl, s, len); + return 0; +} + +#endif diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h new file mode 100644 index 000000000000..6d5247f2e81b --- /dev/null +++ b/include/linux/ceph/rados.h @@ -0,0 +1,405 @@ +#ifndef CEPH_RADOS_H +#define CEPH_RADOS_H + +/* + * Data types for the Ceph distributed object storage layer RADOS + * (Reliable Autonomic Distributed Object Store). + */ + +#include "msgr.h" + +/* + * osdmap encoding versions + */ +#define CEPH_OSDMAP_INC_VERSION 5 +#define CEPH_OSDMAP_INC_VERSION_EXT 5 +#define CEPH_OSDMAP_VERSION 5 +#define CEPH_OSDMAP_VERSION_EXT 5 + +/* + * fs id + */ +struct ceph_fsid { + unsigned char fsid[16]; +}; + +static inline int ceph_fsid_compare(const struct ceph_fsid *a, + const struct ceph_fsid *b) +{ + return memcmp(a, b, sizeof(*a)); +} + +/* + * ino, object, etc. + */ +typedef __le64 ceph_snapid_t; +#define CEPH_SNAPDIR ((__u64)(-1)) /* reserved for hidden .snap dir */ +#define CEPH_NOSNAP ((__u64)(-2)) /* "head", "live" revision */ +#define CEPH_MAXSNAP ((__u64)(-3)) /* largest valid snapid */ + +struct ceph_timespec { + __le32 tv_sec; + __le32 tv_nsec; +} __attribute__ ((packed)); + + +/* + * object layout - how objects are mapped into PGs + */ +#define CEPH_OBJECT_LAYOUT_HASH 1 +#define CEPH_OBJECT_LAYOUT_LINEAR 2 +#define CEPH_OBJECT_LAYOUT_HASHINO 3 + +/* + * pg layout -- how PGs are mapped onto (sets of) OSDs + */ +#define CEPH_PG_LAYOUT_CRUSH 0 +#define CEPH_PG_LAYOUT_HASH 1 +#define CEPH_PG_LAYOUT_LINEAR 2 +#define CEPH_PG_LAYOUT_HYBRID 3 + +#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */ + +/* + * placement group. + * we encode this into one __le64. + */ +struct ceph_pg { + __le16 preferred; /* preferred primary osd */ + __le16 ps; /* placement seed */ + __le32 pool; /* object pool */ +} __attribute__ ((packed)); + +/* + * pg_pool is a set of pgs storing a pool of objects + * + * pg_num -- base number of pseudorandomly placed pgs + * + * pgp_num -- effective number when calculating pg placement. this + * is used for pg_num increases. new pgs result in data being "split" + * into new pgs. for this to proceed smoothly, new pgs are intiially + * colocated with their parents; that is, pgp_num doesn't increase + * until the new pgs have successfully split. only _then_ are the new + * pgs placed independently. + * + * lpg_num -- localized pg count (per device). replicas are randomly + * selected. + * + * lpgp_num -- as above. + */ +#define CEPH_PG_TYPE_REP 1 +#define CEPH_PG_TYPE_RAID4 2 +#define CEPH_PG_POOL_VERSION 2 +struct ceph_pg_pool { + __u8 type; /* CEPH_PG_TYPE_* */ + __u8 size; /* number of osds in each pg */ + __u8 crush_ruleset; /* crush placement rule */ + __u8 object_hash; /* hash mapping object name to ps */ + __le32 pg_num, pgp_num; /* number of pg's */ + __le32 lpg_num, lpgp_num; /* number of localized pg's */ + __le32 last_change; /* most recent epoch changed */ + __le64 snap_seq; /* seq for per-pool snapshot */ + __le32 snap_epoch; /* epoch of last snap */ + __le32 num_snaps; + __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */ + __le64 auid; /* who owns the pg */ +} __attribute__ ((packed)); + +/* + * stable_mod func is used to control number of placement groups. + * similar to straight-up modulo, but produces a stable mapping as b + * increases over time. b is the number of bins, and bmask is the + * containing power of 2 minus 1. + * + * b <= bmask and bmask=(2**n)-1 + * e.g., b=12 -> bmask=15, b=123 -> bmask=127 + */ +static inline int ceph_stable_mod(int x, int b, int bmask) +{ + if ((x & bmask) < b) + return x & bmask; + else + return x & (bmask >> 1); +} + +/* + * object layout - how a given object should be stored. + */ +struct ceph_object_layout { + struct ceph_pg ol_pgid; /* raw pg, with _full_ ps precision. */ + __le32 ol_stripe_unit; /* for per-object parity, if any */ +} __attribute__ ((packed)); + +/* + * compound epoch+version, used by storage layer to serialize mutations + */ +struct ceph_eversion { + __le32 epoch; + __le64 version; +} __attribute__ ((packed)); + +/* + * osd map bits + */ + +/* status bits */ +#define CEPH_OSD_EXISTS 1 +#define CEPH_OSD_UP 2 + +/* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ +#define CEPH_OSD_IN 0x10000 +#define CEPH_OSD_OUT 0 + + +/* + * osd map flag bits + */ +#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */ +#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */ +#define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ +#define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ +#define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ + +/* + * osd ops + */ +#define CEPH_OSD_OP_MODE 0xf000 +#define CEPH_OSD_OP_MODE_RD 0x1000 +#define CEPH_OSD_OP_MODE_WR 0x2000 +#define CEPH_OSD_OP_MODE_RMW 0x3000 +#define CEPH_OSD_OP_MODE_SUB 0x4000 + +#define CEPH_OSD_OP_TYPE 0x0f00 +#define CEPH_OSD_OP_TYPE_LOCK 0x0100 +#define CEPH_OSD_OP_TYPE_DATA 0x0200 +#define CEPH_OSD_OP_TYPE_ATTR 0x0300 +#define CEPH_OSD_OP_TYPE_EXEC 0x0400 +#define CEPH_OSD_OP_TYPE_PG 0x0500 + +enum { + /** data **/ + /* read */ + CEPH_OSD_OP_READ = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 1, + CEPH_OSD_OP_STAT = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 2, + + /* fancy read */ + CEPH_OSD_OP_MASKTRUNC = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 4, + + /* write */ + CEPH_OSD_OP_WRITE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1, + CEPH_OSD_OP_WRITEFULL = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 2, + CEPH_OSD_OP_TRUNCATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 3, + CEPH_OSD_OP_ZERO = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 4, + CEPH_OSD_OP_DELETE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 5, + + /* fancy write */ + CEPH_OSD_OP_APPEND = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 6, + CEPH_OSD_OP_STARTSYNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 7, + CEPH_OSD_OP_SETTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 8, + CEPH_OSD_OP_TRIMTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 9, + + CEPH_OSD_OP_TMAPUP = CEPH_OSD_OP_MODE_RMW | CEPH_OSD_OP_TYPE_DATA | 10, + CEPH_OSD_OP_TMAPPUT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 11, + CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12, + + CEPH_OSD_OP_CREATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13, + CEPH_OSD_OP_ROLLBACK= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 14, + + /** attrs **/ + /* read */ + CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, + CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2, + CEPH_OSD_OP_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 3, + + /* write */ + CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1, + CEPH_OSD_OP_SETXATTRS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 2, + CEPH_OSD_OP_RESETXATTRS = CEPH_OSD_OP_MODE_WR|CEPH_OSD_OP_TYPE_ATTR | 3, + CEPH_OSD_OP_RMXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 4, + + /** subop **/ + CEPH_OSD_OP_PULL = CEPH_OSD_OP_MODE_SUB | 1, + CEPH_OSD_OP_PUSH = CEPH_OSD_OP_MODE_SUB | 2, + CEPH_OSD_OP_BALANCEREADS = CEPH_OSD_OP_MODE_SUB | 3, + CEPH_OSD_OP_UNBALANCEREADS = CEPH_OSD_OP_MODE_SUB | 4, + CEPH_OSD_OP_SCRUB = CEPH_OSD_OP_MODE_SUB | 5, + + /** lock **/ + CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1, + CEPH_OSD_OP_WRUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 2, + CEPH_OSD_OP_RDLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 3, + CEPH_OSD_OP_RDUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 4, + CEPH_OSD_OP_UPLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 5, + CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6, + + /** exec **/ + CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1, + + /** pg **/ + CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1, +}; + +static inline int ceph_osd_op_type_lock(int op) +{ + return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_LOCK; +} +static inline int ceph_osd_op_type_data(int op) +{ + return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_DATA; +} +static inline int ceph_osd_op_type_attr(int op) +{ + return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_ATTR; +} +static inline int ceph_osd_op_type_exec(int op) +{ + return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_EXEC; +} +static inline int ceph_osd_op_type_pg(int op) +{ + return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG; +} + +static inline int ceph_osd_op_mode_subop(int op) +{ + return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_SUB; +} +static inline int ceph_osd_op_mode_read(int op) +{ + return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD; +} +static inline int ceph_osd_op_mode_modify(int op) +{ + return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; +} + +/* + * note that the following tmap stuff is also defined in the ceph librados.h + * any modification here needs to be updated there + */ +#define CEPH_OSD_TMAP_HDR 'h' +#define CEPH_OSD_TMAP_SET 's' +#define CEPH_OSD_TMAP_RM 'r' + +extern const char *ceph_osd_op_name(int op); + + +/* + * osd op flags + * + * An op may be READ, WRITE, or READ|WRITE. + */ +enum { + CEPH_OSD_FLAG_ACK = 1, /* want (or is) "ack" ack */ + CEPH_OSD_FLAG_ONNVRAM = 2, /* want (or is) "onnvram" ack */ + CEPH_OSD_FLAG_ONDISK = 4, /* want (or is) "ondisk" ack */ + CEPH_OSD_FLAG_RETRY = 8, /* resend attempt */ + CEPH_OSD_FLAG_READ = 16, /* op may read */ + CEPH_OSD_FLAG_WRITE = 32, /* op may write */ + CEPH_OSD_FLAG_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */ + CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */ + CEPH_OSD_FLAG_BALANCE_READS = 256, + CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ + CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ + CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ + CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */ +}; + +enum { + CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ +}; + +#define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ +#define EBLACKLISTED ESHUTDOWN /* blacklisted */ + +/* xattr comparison */ +enum { + CEPH_OSD_CMPXATTR_OP_NOP = 0, + CEPH_OSD_CMPXATTR_OP_EQ = 1, + CEPH_OSD_CMPXATTR_OP_NE = 2, + CEPH_OSD_CMPXATTR_OP_GT = 3, + CEPH_OSD_CMPXATTR_OP_GTE = 4, + CEPH_OSD_CMPXATTR_OP_LT = 5, + CEPH_OSD_CMPXATTR_OP_LTE = 6 +}; + +enum { + CEPH_OSD_CMPXATTR_MODE_STRING = 1, + CEPH_OSD_CMPXATTR_MODE_U64 = 2 +}; + +/* + * an individual object operation. each may be accompanied by some data + * payload + */ +struct ceph_osd_op { + __le16 op; /* CEPH_OSD_OP_* */ + __le32 flags; /* CEPH_OSD_FLAG_* */ + union { + struct { + __le64 offset, length; + __le64 truncate_size; + __le32 truncate_seq; + } __attribute__ ((packed)) extent; + struct { + __le32 name_len; + __le32 value_len; + __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ + __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ + } __attribute__ ((packed)) xattr; + struct { + __u8 class_len; + __u8 method_len; + __u8 argc; + __le32 indata_len; + } __attribute__ ((packed)) cls; + struct { + __le64 cookie, count; + } __attribute__ ((packed)) pgls; + struct { + __le64 snapid; + } __attribute__ ((packed)) snap; + }; + __le32 payload_len; +} __attribute__ ((packed)); + +/* + * osd request message header. each request may include multiple + * ceph_osd_op object operations. + */ +struct ceph_osd_request_head { + __le32 client_inc; /* client incarnation */ + struct ceph_object_layout layout; /* pgid */ + __le32 osdmap_epoch; /* client's osdmap epoch */ + + __le32 flags; + + struct ceph_timespec mtime; /* for mutations only */ + struct ceph_eversion reassert_version; /* if we are replaying op */ + + __le32 object_len; /* length of object name */ + + __le64 snapid; /* snapid to read */ + __le64 snap_seq; /* writer's snap context */ + __le32 num_snaps; + + __le16 num_ops; + struct ceph_osd_op ops[]; /* followed by ops[], obj, ticket, snaps */ +} __attribute__ ((packed)); + +struct ceph_osd_reply_head { + __le32 client_inc; /* client incarnation */ + __le32 flags; + struct ceph_object_layout layout; + __le32 osdmap_epoch; + struct ceph_eversion reassert_version; /* for replaying uncommitted */ + + __le32 result; /* result code */ + + __le32 object_len; /* length of object name */ + __le32 num_ops; + struct ceph_osd_op ops[0]; /* ops[], object */ +} __attribute__ ((packed)); + + +#endif diff --git a/include/linux/ceph/types.h b/include/linux/ceph/types.h new file mode 100644 index 000000000000..28b35a005ec2 --- /dev/null +++ b/include/linux/ceph/types.h @@ -0,0 +1,29 @@ +#ifndef _FS_CEPH_TYPES_H +#define _FS_CEPH_TYPES_H + +/* needed before including ceph_fs.h */ +#include <linux/in.h> +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/string.h> + +#include "ceph_fs.h" +#include "ceph_frag.h" +#include "ceph_hash.h" + +/* + * Identify inodes by both their ino AND snapshot id (a u64). + */ +struct ceph_vino { + u64 ino; + u64 snap; +}; + + +/* context for the caps reservation mechanism */ +struct ceph_cap_reservation { + int count; +}; + + +#endif diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 0c991023ee47..709dfb901d11 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -75,7 +75,7 @@ struct cgroup_subsys_state { unsigned long flags; /* ID for this css, if possible */ - struct css_id *id; + struct css_id __rcu *id; }; /* bits in struct cgroup_subsys_state flags field */ @@ -205,7 +205,7 @@ struct cgroup { struct list_head children; /* my children */ struct cgroup *parent; /* my parent */ - struct dentry *dentry; /* cgroup fs entry, RCU protected */ + struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */ /* Private pointers for each registered subsystem */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c1a62c56a660..320d6c94ff84 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -16,7 +16,11 @@ # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) # define __percpu __attribute__((noderef, address_space(3))) +#ifdef CONFIG_SPARSE_RCU_POINTER +# define __rcu __attribute__((noderef, address_space(4))) +#else # define __rcu +#endif extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); #else diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 8ba66a9d9022..ba4b85a6d9b8 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -9,37 +9,7 @@ * These are the only things you should do on a core-file: use only these * functions to write out all the necessary info. */ -static inline int dump_write(struct file *file, const void *addr, int nr) -{ - return file->f_op->write(file, addr, nr, &file->f_pos) == nr; -} - -static inline int dump_seek(struct file *file, loff_t off) -{ - int ret = 1; - - if (file->f_op->llseek && file->f_op->llseek != no_llseek) { - if (file->f_op->llseek(file, off, SEEK_CUR) < 0) - return 0; - } else { - char *buf = (char *)get_zeroed_page(GFP_KERNEL); - - if (!buf) - return 0; - while (off > 0) { - unsigned long n = off; - - if (n > PAGE_SIZE) - n = PAGE_SIZE; - if (!dump_write(file, buf, n)) { - ret = 0; - break; - } - off -= n; - } - free_page((unsigned long)buf); - } - return ret; -} +extern int dump_write(struct file *file, const void *addr, int nr); +extern int dump_seek(struct file *file, loff_t off); #endif /* _LINUX_COREDUMP_H */ diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 36ca9721a0c2..1be416bbbb82 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -53,6 +53,7 @@ struct cpuidle_state { #define CPUIDLE_FLAG_BALANCED (0x40) /* medium latency, moderate savings */ #define CPUIDLE_FLAG_DEEP (0x80) /* high latency, large savings */ #define CPUIDLE_FLAG_IGNORE (0x100) /* ignore during this idle period */ +#define CPUIDLE_FLAG_TLB_FLUSHED (0x200) /* tlb will be flushed */ #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) diff --git a/include/linux/cred.h b/include/linux/cred.h index 4d2c39573f36..4aaeab376446 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -84,7 +84,7 @@ struct thread_group_cred { atomic_t usage; pid_t tgid; /* thread group process ID */ spinlock_t lock; - struct key *session_keyring; /* keyring inherited over fork */ + struct key __rcu *session_keyring; /* keyring inherited over fork */ struct key *process_keyring; /* keyring private to this process */ struct rcu_head rcu; /* RCU deletion hook */ }; diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h new file mode 100644 index 000000000000..97e435b191f4 --- /dev/null +++ b/include/linux/crush/crush.h @@ -0,0 +1,180 @@ +#ifndef CEPH_CRUSH_CRUSH_H +#define CEPH_CRUSH_CRUSH_H + +#include <linux/types.h> + +/* + * CRUSH is a pseudo-random data distribution algorithm that + * efficiently distributes input values (typically, data objects) + * across a heterogeneous, structured storage cluster. + * + * The algorithm was originally described in detail in this paper + * (although the algorithm has evolved somewhat since then): + * + * http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf + * + * LGPL2 + */ + + +#define CRUSH_MAGIC 0x00010000ul /* for detecting algorithm revisions */ + + +#define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */ +#define CRUSH_MAX_SET 10 /* max size of a mapping result */ + + +/* + * CRUSH uses user-defined "rules" to describe how inputs should be + * mapped to devices. A rule consists of sequence of steps to perform + * to generate the set of output devices. + */ +struct crush_rule_step { + __u32 op; + __s32 arg1; + __s32 arg2; +}; + +/* step op codes */ +enum { + CRUSH_RULE_NOOP = 0, + CRUSH_RULE_TAKE = 1, /* arg1 = value to start with */ + CRUSH_RULE_CHOOSE_FIRSTN = 2, /* arg1 = num items to pick */ + /* arg2 = type */ + CRUSH_RULE_CHOOSE_INDEP = 3, /* same */ + CRUSH_RULE_EMIT = 4, /* no args */ + CRUSH_RULE_CHOOSE_LEAF_FIRSTN = 6, + CRUSH_RULE_CHOOSE_LEAF_INDEP = 7, +}; + +/* + * for specifying choose num (arg1) relative to the max parameter + * passed to do_rule + */ +#define CRUSH_CHOOSE_N 0 +#define CRUSH_CHOOSE_N_MINUS(x) (-(x)) + +/* + * The rule mask is used to describe what the rule is intended for. + * Given a ruleset and size of output set, we search through the + * rule list for a matching rule_mask. + */ +struct crush_rule_mask { + __u8 ruleset; + __u8 type; + __u8 min_size; + __u8 max_size; +}; + +struct crush_rule { + __u32 len; + struct crush_rule_mask mask; + struct crush_rule_step steps[0]; +}; + +#define crush_rule_size(len) (sizeof(struct crush_rule) + \ + (len)*sizeof(struct crush_rule_step)) + + + +/* + * A bucket is a named container of other items (either devices or + * other buckets). Items within a bucket are chosen using one of a + * few different algorithms. The table summarizes how the speed of + * each option measures up against mapping stability when items are + * added or removed. + * + * Bucket Alg Speed Additions Removals + * ------------------------------------------------ + * uniform O(1) poor poor + * list O(n) optimal poor + * tree O(log n) good good + * straw O(n) optimal optimal + */ +enum { + CRUSH_BUCKET_UNIFORM = 1, + CRUSH_BUCKET_LIST = 2, + CRUSH_BUCKET_TREE = 3, + CRUSH_BUCKET_STRAW = 4 +}; +extern const char *crush_bucket_alg_name(int alg); + +struct crush_bucket { + __s32 id; /* this'll be negative */ + __u16 type; /* non-zero; type=0 is reserved for devices */ + __u8 alg; /* one of CRUSH_BUCKET_* */ + __u8 hash; /* which hash function to use, CRUSH_HASH_* */ + __u32 weight; /* 16-bit fixed point */ + __u32 size; /* num items */ + __s32 *items; + + /* + * cached random permutation: used for uniform bucket and for + * the linear search fallback for the other bucket types. + */ + __u32 perm_x; /* @x for which *perm is defined */ + __u32 perm_n; /* num elements of *perm that are permuted/defined */ + __u32 *perm; +}; + +struct crush_bucket_uniform { + struct crush_bucket h; + __u32 item_weight; /* 16-bit fixed point; all items equally weighted */ +}; + +struct crush_bucket_list { + struct crush_bucket h; + __u32 *item_weights; /* 16-bit fixed point */ + __u32 *sum_weights; /* 16-bit fixed point. element i is sum + of weights 0..i, inclusive */ +}; + +struct crush_bucket_tree { + struct crush_bucket h; /* note: h.size is _tree_ size, not number of + actual items */ + __u8 num_nodes; + __u32 *node_weights; +}; + +struct crush_bucket_straw { + struct crush_bucket h; + __u32 *item_weights; /* 16-bit fixed point */ + __u32 *straws; /* 16-bit fixed point */ +}; + + + +/* + * CRUSH map includes all buckets, rules, etc. + */ +struct crush_map { + struct crush_bucket **buckets; + struct crush_rule **rules; + + /* + * Parent pointers to identify the parent bucket a device or + * bucket in the hierarchy. If an item appears more than + * once, this is the _last_ time it appeared (where buckets + * are processed in bucket id order, from -1 on down to + * -max_buckets. + */ + __u32 *bucket_parents; + __u32 *device_parents; + + __s32 max_buckets; + __u32 max_rules; + __s32 max_devices; +}; + + +/* crush.c */ +extern int crush_get_bucket_item_weight(struct crush_bucket *b, int pos); +extern void crush_calc_parents(struct crush_map *map); +extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b); +extern void crush_destroy_bucket_list(struct crush_bucket_list *b); +extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b); +extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b); +extern void crush_destroy_bucket(struct crush_bucket *b); +extern void crush_destroy(struct crush_map *map); + +#endif diff --git a/include/linux/crush/hash.h b/include/linux/crush/hash.h new file mode 100644 index 000000000000..91e884230d5d --- /dev/null +++ b/include/linux/crush/hash.h @@ -0,0 +1,17 @@ +#ifndef CEPH_CRUSH_HASH_H +#define CEPH_CRUSH_HASH_H + +#define CRUSH_HASH_RJENKINS1 0 + +#define CRUSH_HASH_DEFAULT CRUSH_HASH_RJENKINS1 + +extern const char *crush_hash_name(int type); + +extern __u32 crush_hash32(int type, __u32 a); +extern __u32 crush_hash32_2(int type, __u32 a, __u32 b); +extern __u32 crush_hash32_3(int type, __u32 a, __u32 b, __u32 c); +extern __u32 crush_hash32_4(int type, __u32 a, __u32 b, __u32 c, __u32 d); +extern __u32 crush_hash32_5(int type, __u32 a, __u32 b, __u32 c, __u32 d, + __u32 e); + +#endif diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h new file mode 100644 index 000000000000..c46b99c18bb0 --- /dev/null +++ b/include/linux/crush/mapper.h @@ -0,0 +1,20 @@ +#ifndef CEPH_CRUSH_MAPPER_H +#define CEPH_CRUSH_MAPPER_H + +/* + * CRUSH functions for find rules and then mapping an input to an + * output set. + * + * LGPL2 + */ + +#include "crush.h" + +extern int crush_find_rule(struct crush_map *map, int pool, int type, int size); +extern int crush_do_rule(struct crush_map *map, + int ruleno, + int x, int *result, int result_max, + int forcefeed, /* -1 for none */ + __u32 *weights); + +#endif diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 7434a8353e23..7187bd8a75f6 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -165,8 +165,10 @@ enum { DCCPO_TIMESTAMP_ECHO = 42, DCCPO_ELAPSED_TIME = 43, DCCPO_MAX = 45, - DCCPO_MIN_CCID_SPECIFIC = 128, - DCCPO_MAX_CCID_SPECIFIC = 255, + DCCPO_MIN_RX_CCID_SPECIFIC = 128, /* from sender to receiver */ + DCCPO_MAX_RX_CCID_SPECIFIC = 191, + DCCPO_MIN_TX_CCID_SPECIFIC = 192, /* from receiver to sender */ + DCCPO_MAX_TX_CCID_SPECIFIC = 255, }; /* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */ #define DCCP_SINGLE_OPT_MAXLEN 253 diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 29b3ce3f2a1d..2833452ea01c 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -49,7 +49,6 @@ struct task_struct; #ifdef CONFIG_LOCKDEP extern void debug_show_all_locks(void); -extern void __debug_show_held_locks(struct task_struct *task); extern void debug_show_held_locks(struct task_struct *task); extern void debug_check_no_locks_freed(const void *from, unsigned long len); extern void debug_check_no_locks_held(struct task_struct *task); @@ -58,10 +57,6 @@ static inline void debug_show_all_locks(void) { } -static inline void __debug_show_held_locks(struct task_struct *task) -{ -} - static inline void debug_show_held_locks(struct task_struct *task) { } diff --git a/include/linux/device.h b/include/linux/device.h index 516fecacf27b..dd4895313468 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -751,4 +751,11 @@ do { \ MODULE_ALIAS("char-major-" __stringify(major) "-" __stringify(minor)) #define MODULE_ALIAS_CHARDEV_MAJOR(major) \ MODULE_ALIAS("char-major-" __stringify(major) "-*") + +#ifdef CONFIG_SYSFS_DEPRECATED +extern long sysfs_deprecated; +#else +#define sysfs_deprecated 0 +#endif + #endif /* _DEVICE_H_ */ diff --git a/include/linux/dlm.h b/include/linux/dlm.h index 0b3518c42356..d4e02f5353a0 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -48,10 +48,10 @@ typedef void dlm_lockspace_t; * * 0 if lock request was successful * -EAGAIN if request would block and is flagged DLM_LKF_NOQUEUE - * -ENOMEM if there is no memory to process request - * -EINVAL if there are invalid parameters * -DLM_EUNLOCK if unlock request was successful * -DLM_ECANCEL if a cancel completed successfully + * -EDEADLK if a deadlock was detected + * -ETIMEDOUT if the lock request was canceled due to a timeout */ #define DLM_SBF_DEMOTED 0x01 diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c61d4ca27bcc..e2106495cc11 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -548,7 +548,7 @@ static inline bool dma_dev_has_pq_continue(struct dma_device *dma) return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE; } -static unsigned short dma_dev_to_maxpq(struct dma_device *dma) +static inline unsigned short dma_dev_to_maxpq(struct dma_device *dma) { return dma->max_pq & ~DMA_HAS_PQ_CONTINUE; } diff --git a/include/linux/dmar.h b/include/linux/dmar.h index d7cecc90ed34..a7d9dc21391d 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -57,15 +57,15 @@ extern int dmar_table_init(void); extern int dmar_dev_scope_init(void); /* Intel IOMMU detection */ -extern void detect_intel_iommu(void); +extern int detect_intel_iommu(void); extern int enable_drhd_fault_handling(void); extern int parse_ioapics_under_ir(void); extern int alloc_iommu(struct dmar_drhd_unit *); #else -static inline void detect_intel_iommu(void) +static inline int detect_intel_iommu(void) { - return; + return -ENODEV; } static inline int dmar_table_init(void) @@ -106,6 +106,7 @@ struct irte { __u64 high; }; }; + #ifdef CONFIG_INTR_REMAP extern int intr_remapping_enabled; extern int intr_remapping_supported(void); @@ -119,11 +120,8 @@ extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count); extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 sub_handle); extern int map_irq_to_irte_handle(int irq, u16 *sub_handle); -extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index); -extern int flush_irte(int irq); extern int free_irte(int irq); -extern int irq_remapped(int irq); extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); extern struct intel_iommu *map_ioapic_to_ir(int apic); extern struct intel_iommu *map_hpet_to_ir(u8 id); @@ -177,7 +175,6 @@ static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev) return 0; } -#define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) #define disable_intr_remapping() (0) #define reenable_intr_remapping(mode) (0) @@ -187,8 +184,9 @@ static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev) /* Can't use the common MSI interrupt functions * since DMAR is not a pci device */ -extern void dmar_msi_unmask(unsigned int irq); -extern void dmar_msi_mask(unsigned int irq); +struct irq_data; +extern void dmar_msi_unmask(struct irq_data *data); +extern void dmar_msi_mask(struct irq_data *data); extern void dmar_msi_read(int irq, struct msi_msg *msg); extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 479ee3a1d901..9b2a0158f399 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,10 +53,10 @@ extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.8.1" +#define REL_VERSION "8.3.9rc2" #define API_VERSION 88 #define PRO_VERSION_MIN 86 -#define PRO_VERSION_MAX 94 +#define PRO_VERSION_MAX 95 enum drbd_io_error_p { @@ -91,6 +91,11 @@ enum drbd_after_sb_p { ASB_VIOLENTLY }; +enum drbd_on_no_data { + OND_IO_ERROR, + OND_SUSPEND_IO +}; + /* KEEP the order, do not delete or insert. Only append. */ enum drbd_ret_codes { ERR_CODE_BASE = 100, @@ -140,6 +145,7 @@ enum drbd_ret_codes { ERR_CONNECTED = 151, /* DRBD 8.3 only */ ERR_PERM = 152, ERR_NEED_APV_93 = 153, + ERR_STONITH_AND_PROT_A = 154, /* insert new ones above this line */ AFTER_LAST_ERR_CODE @@ -226,13 +232,17 @@ union drbd_state { unsigned conn:5 ; /* 17/32 cstates */ unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ - unsigned susp:1 ; /* 2/2 IO suspended no/yes */ + unsigned susp:1 ; /* 2/2 IO suspended no/yes (by user) */ unsigned aftr_isp:1 ; /* isp .. imposed sync pause */ unsigned peer_isp:1 ; unsigned user_isp:1 ; - unsigned _pad:11; /* 0 unused */ + unsigned susp_nod:1 ; /* IO suspended because no data */ + unsigned susp_fen:1 ; /* IO suspended because fence peer handler runs*/ + unsigned _pad:9; /* 0 unused */ #elif defined(__BIG_ENDIAN_BITFIELD) - unsigned _pad:11; /* 0 unused */ + unsigned _pad:9; + unsigned susp_fen:1 ; + unsigned susp_nod:1 ; unsigned user_isp:1 ; unsigned peer_isp:1 ; unsigned aftr_isp:1 ; /* isp .. imposed sync pause */ @@ -312,6 +322,8 @@ enum drbd_timeout_flag { #define DRBD_MAGIC 0x83740267 #define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC) +#define DRBD_MAGIC_BIG 0x835a +#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG) /* these are of type "int" */ #define DRBD_MD_INDEX_INTERNAL -1 diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 440b42e38e89..4ac33f34b77e 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -128,26 +128,31 @@ #define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT #define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT +#define DRBD_ON_NO_DATA_DEF OND_IO_ERROR #define DRBD_MAX_BIO_BVECS_MIN 0 #define DRBD_MAX_BIO_BVECS_MAX 128 #define DRBD_MAX_BIO_BVECS_DEF 0 -#define DRBD_DP_VOLUME_MIN 4 -#define DRBD_DP_VOLUME_MAX 1048576 -#define DRBD_DP_VOLUME_DEF 16384 +#define DRBD_C_PLAN_AHEAD_MIN 0 +#define DRBD_C_PLAN_AHEAD_MAX 300 +#define DRBD_C_PLAN_AHEAD_DEF 0 /* RS rate controller disabled by default */ -#define DRBD_DP_INTERVAL_MIN 1 -#define DRBD_DP_INTERVAL_MAX 600 -#define DRBD_DP_INTERVAL_DEF 5 +#define DRBD_C_DELAY_TARGET_MIN 1 +#define DRBD_C_DELAY_TARGET_MAX 100 +#define DRBD_C_DELAY_TARGET_DEF 10 -#define DRBD_RS_THROTTLE_TH_MIN 1 -#define DRBD_RS_THROTTLE_TH_MAX 600 -#define DRBD_RS_THROTTLE_TH_DEF 20 +#define DRBD_C_FILL_TARGET_MIN 0 +#define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */ +#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */ -#define DRBD_RS_HOLD_OFF_TH_MIN 1 -#define DRBD_RS_HOLD_OFF_TH_MAX 6000 -#define DRBD_RS_HOLD_OFF_TH_DEF 100 +#define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */ +#define DRBD_C_MAX_RATE_MAX (4 << 20) +#define DRBD_C_MAX_RATE_DEF 102400 + +#define DRBD_C_MIN_RATE_MIN 0 /* kByte/sec */ +#define DRBD_C_MIN_RATE_MAX (4 << 20) +#define DRBD_C_MIN_RATE_DEF 4096 #undef RANGE #endif diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index 5f042810a56c..ade91107c9a5 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -87,6 +87,12 @@ NL_PACKET(syncer_conf, 8, NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32) NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX) NL_BIT( 65, T_MAY_IGNORE, use_rle) + NL_INTEGER( 75, T_MAY_IGNORE, on_no_data) + NL_INTEGER( 76, T_MAY_IGNORE, c_plan_ahead) + NL_INTEGER( 77, T_MAY_IGNORE, c_delay_target) + NL_INTEGER( 78, T_MAY_IGNORE, c_fill_target) + NL_INTEGER( 79, T_MAY_IGNORE, c_max_rate) + NL_INTEGER( 80, T_MAY_IGNORE, c_min_rate) ) NL_PACKET(invalidate, 9, ) diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index 52c0da4bdd18..a90b3892074a 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -1,6 +1,8 @@ #ifndef _DYNAMIC_DEBUG_H #define _DYNAMIC_DEBUG_H +#include <linux/jump_label.h> + /* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They * use independent hash functions, to reduce the chance of false positives. @@ -22,8 +24,6 @@ struct _ddebug { const char *function; const char *filename; const char *format; - char primary_hash; - char secondary_hash; unsigned int lineno:24; /* * The flags field controls the behaviour at the callsite. @@ -33,6 +33,7 @@ struct _ddebug { #define _DPRINTK_FLAGS_PRINT (1<<0) /* printk() a message using the format */ #define _DPRINTK_FLAGS_DEFAULT 0 unsigned int flags:8; + char enabled; } __attribute__((aligned(8))); @@ -42,33 +43,35 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, #if defined(CONFIG_DYNAMIC_DEBUG) extern int ddebug_remove_module(const char *mod_name); -#define __dynamic_dbg_enabled(dd) ({ \ - int __ret = 0; \ - if (unlikely((dynamic_debug_enabled & (1LL << DEBUG_HASH)) && \ - (dynamic_debug_enabled2 & (1LL << DEBUG_HASH2)))) \ - if (unlikely(dd.flags)) \ - __ret = 1; \ - __ret; }) - #define dynamic_pr_debug(fmt, ...) do { \ + __label__ do_printk; \ + __label__ out; \ static struct _ddebug descriptor \ __used \ __attribute__((section("__verbose"), aligned(8))) = \ - { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ - DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ - if (__dynamic_dbg_enabled(descriptor)) \ - printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ + { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ + _DPRINTK_FLAGS_DEFAULT }; \ + JUMP_LABEL(&descriptor.enabled, do_printk); \ + goto out; \ +do_printk: \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ +out: ; \ } while (0) #define dynamic_dev_dbg(dev, fmt, ...) do { \ + __label__ do_printk; \ + __label__ out; \ static struct _ddebug descriptor \ __used \ __attribute__((section("__verbose"), aligned(8))) = \ - { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ - DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ - if (__dynamic_dbg_enabled(descriptor)) \ - dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ + { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ + _DPRINTK_FLAGS_DEFAULT }; \ + JUMP_LABEL(&descriptor.enabled, do_printk); \ + goto out; \ +do_printk: \ + dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ +out: ; \ } while (0) #else @@ -80,7 +83,7 @@ static inline int ddebug_remove_module(const char *mod) #define dynamic_pr_debug(fmt, ...) \ do { if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0) -#define dynamic_dev_dbg(dev, format, ...) \ +#define dynamic_dev_dbg(dev, fmt, ...) \ do { if (0) dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); } while (0) #endif diff --git a/include/linux/early_res.h b/include/linux/early_res.h deleted file mode 100644 index 29c09f57a13c..000000000000 --- a/include/linux/early_res.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef _LINUX_EARLY_RES_H -#define _LINUX_EARLY_RES_H -#ifdef __KERNEL__ - -extern void reserve_early(u64 start, u64 end, char *name); -extern void reserve_early_overlap_ok(u64 start, u64 end, char *name); -extern void free_early(u64 start, u64 end); -void free_early_partial(u64 start, u64 end); -extern void early_res_to_bootmem(u64 start, u64 end); - -void reserve_early_without_check(u64 start, u64 end, char *name); -u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, - u64 size, u64 align); -u64 find_early_area_size(u64 ei_start, u64 ei_last, u64 start, - u64 *sizep, u64 align); -u64 find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align); -u64 get_max_mapped(void); -#include <linux/range.h> -int get_free_all_memory_range(struct range **rangep, int nodeid); - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_EARLY_RES_H */ diff --git a/include/linux/edac.h b/include/linux/edac.h index 7cf92e8a4196..36c66443bdfd 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -13,6 +13,7 @@ #define _LINUX_EDAC_H_ #include <asm/atomic.h> +#include <linux/sysdev.h> #define EDAC_OPSTATE_INVAL -1 #define EDAC_OPSTATE_POLL 0 @@ -22,9 +23,12 @@ extern int edac_op_state; extern int edac_err_assert; extern atomic_t edac_handlers; +extern struct sysdev_class edac_class; extern int edac_handler_set(void); extern void edac_atomic_assert_error(void); +extern struct sysdev_class *edac_get_sysfs_class(void); +extern void edac_put_sysfs_class(void); static inline void opstate_init(void) { diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 926b50322a46..80a0ece8f7e4 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -93,6 +93,7 @@ struct elevator_queue struct elevator_type *elevator_type; struct mutex sysfs_lock; struct hlist_head *hash; + unsigned int registered:1; }; /* @@ -121,6 +122,8 @@ extern void elv_completed_request(struct request_queue *, struct request *); extern int elv_set_request(struct request_queue *, struct request *, gfp_t); extern void elv_put_request(struct request_queue *, struct request *); extern void elv_drain_elevator(struct request_queue *); +extern void elv_quiesce_start(struct request_queue *); +extern void elv_quiesce_end(struct request_queue *); /* * io scheduler registration diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 2308fbb4523a..f16a01081e15 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -71,7 +71,7 @@ static inline int is_zero_ether_addr(const u8 *addr) */ static inline int is_multicast_ether_addr(const u8 *addr) { - return (0x01 & addr[0]); + return 0x01 & addr[0]; } /** @@ -82,7 +82,7 @@ static inline int is_multicast_ether_addr(const u8 *addr) */ static inline int is_local_ether_addr(const u8 *addr) { - return (0x02 & addr[0]); + return 0x02 & addr[0]; } /** @@ -237,13 +237,29 @@ static inline bool is_etherdev_addr(const struct net_device *dev, * entry points. */ -static inline int compare_ether_header(const void *a, const void *b) +static inline unsigned long compare_ether_header(const void *a, const void *b) { +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + unsigned long fold; + + /* + * We want to compare 14 bytes: + * [a0 ... a13] ^ [b0 ... b13] + * Use two long XOR, ORed together, with an overlap of two bytes. + * [a0 a1 a2 a3 a4 a5 a6 a7 ] ^ [b0 b1 b2 b3 b4 b5 b6 b7 ] | + * [a6 a7 a8 a9 a10 a11 a12 a13] ^ [b6 b7 b8 b9 b10 b11 b12 b13] + * This means the [a6 a7] ^ [b6 b7] part is done two times. + */ + fold = *(unsigned long *)a ^ *(unsigned long *)b; + fold |= *(unsigned long *)(a + 6) ^ *(unsigned long *)(b + 6); + return fold; +#else u32 *a32 = (u32 *)((u8 *)a + 2); u32 *b32 = (u32 *)((u8 *)b + 2); return (*(u16 *)a ^ *(u16 *)b) | (a32[0] ^ b32[0]) | (a32[1] ^ b32[1]) | (a32[2] ^ b32[2]); +#endif } #endif /* _LINUX_ETHERDEVICE_H */ diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 991269e5b152..6628a507fd3b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -14,6 +14,7 @@ #define _LINUX_ETHTOOL_H #include <linux/types.h> +#include <linux/if_ether.h> /* This should work for both 32 and 64 bit userland. */ struct ethtool_cmd { @@ -308,15 +309,28 @@ struct ethtool_perm_addr { * flag differs from the read-only value. */ enum ethtool_flags { + ETH_FLAG_TXVLAN = (1 << 7), /* TX VLAN offload enabled */ + ETH_FLAG_RXVLAN = (1 << 8), /* RX VLAN offload enabled */ ETH_FLAG_LRO = (1 << 15), /* LRO is enabled */ ETH_FLAG_NTUPLE = (1 << 27), /* N-tuple filters enabled */ ETH_FLAG_RXHASH = (1 << 28), }; /* The following structures are for supporting RX network flow - * classification configuration. Note, all multibyte fields, e.g., - * ip4src, ip4dst, psrc, pdst, spi, etc. are expected to be in network - * byte order. + * classification and RX n-tuple configuration. Note, all multibyte + * fields, e.g., ip4src, ip4dst, psrc, pdst, spi, etc. are expected to + * be in network byte order. + */ + +/** + * struct ethtool_tcpip4_spec - flow specification for TCP/IPv4 etc. + * @ip4src: Source host + * @ip4dst: Destination host + * @psrc: Source port + * @pdst: Destination port + * @tos: Type-of-service + * + * This can be used to specify a TCP/IPv4, UDP/IPv4 or SCTP/IPv4 flow. */ struct ethtool_tcpip4_spec { __be32 ip4src; @@ -326,6 +340,15 @@ struct ethtool_tcpip4_spec { __u8 tos; }; +/** + * struct ethtool_ah_espip4_spec - flow specification for IPsec/IPv4 + * @ip4src: Source host + * @ip4dst: Destination host + * @spi: Security parameters index + * @tos: Type-of-service + * + * This can be used to specify an IPsec transport or tunnel over IPv4. + */ struct ethtool_ah_espip4_spec { __be32 ip4src; __be32 ip4dst; @@ -333,21 +356,17 @@ struct ethtool_ah_espip4_spec { __u8 tos; }; -struct ethtool_rawip4_spec { - __be32 ip4src; - __be32 ip4dst; - __u8 hdata[64]; -}; - -struct ethtool_ether_spec { - __be16 ether_type; - __u8 frame_size; - __u8 eframe[16]; -}; - #define ETH_RX_NFC_IP4 1 -#define ETH_RX_NFC_IP6 2 +/** + * struct ethtool_usrip4_spec - general flow specification for IPv4 + * @ip4src: Source host + * @ip4dst: Destination host + * @l4_4_bytes: First 4 bytes of transport (layer 4) header + * @tos: Type-of-service + * @ip_ver: Value must be %ETH_RX_NFC_IP4; mask must be 0 + * @proto: Transport protocol number; mask must be 0 + */ struct ethtool_usrip4_spec { __be32 ip4src; __be32 ip4dst; @@ -357,6 +376,15 @@ struct ethtool_usrip4_spec { __u8 proto; }; +/** + * struct ethtool_rx_flow_spec - specification for RX flow filter + * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW + * @h_u: Flow fields to match (dependent on @flow_type) + * @m_u: Masks for flow field bits to be ignored + * @ring_cookie: RX ring/queue index to deliver to, or %RX_CLS_FLOW_DISC + * if packets should be discarded + * @location: Index of filter in hardware table + */ struct ethtool_rx_flow_spec { __u32 flow_type; union { @@ -365,36 +393,91 @@ struct ethtool_rx_flow_spec { struct ethtool_tcpip4_spec sctp_ip4_spec; struct ethtool_ah_espip4_spec ah_ip4_spec; struct ethtool_ah_espip4_spec esp_ip4_spec; - struct ethtool_rawip4_spec raw_ip4_spec; - struct ethtool_ether_spec ether_spec; struct ethtool_usrip4_spec usr_ip4_spec; - __u8 hdata[64]; - } h_u, m_u; /* entry, mask */ + struct ethhdr ether_spec; + __u8 hdata[72]; + } h_u, m_u; __u64 ring_cookie; __u32 location; }; +/** + * struct ethtool_rxnfc - command to get or set RX flow classification rules + * @cmd: Specific command number - %ETHTOOL_GRXFH, %ETHTOOL_SRXFH, + * %ETHTOOL_GRXRINGS, %ETHTOOL_GRXCLSRLCNT, %ETHTOOL_GRXCLSRULE, + * %ETHTOOL_GRXCLSRLALL, %ETHTOOL_SRXCLSRLDEL or %ETHTOOL_SRXCLSRLINS + * @flow_type: Type of flow to be affected, e.g. %TCP_V4_FLOW + * @data: Command-dependent value + * @fs: Flow filter specification + * @rule_cnt: Number of rules to be affected + * @rule_locs: Array of valid rule indices + * + * For %ETHTOOL_GRXFH and %ETHTOOL_SRXFH, @data is a bitmask indicating + * the fields included in the flow hash, e.g. %RXH_IP_SRC. The following + * structure fields must not be used. + * + * For %ETHTOOL_GRXRINGS, @data is set to the number of RX rings/queues + * on return. + * + * For %ETHTOOL_GRXCLSRLCNT, @rule_cnt is set to the number of defined + * rules on return. + * + * For %ETHTOOL_GRXCLSRULE, @fs.@location specifies the index of an + * existing filter rule on entry and @fs contains the rule on return. + * + * For %ETHTOOL_GRXCLSRLALL, @rule_cnt specifies the array size of the + * user buffer for @rule_locs on entry. On return, @data is the size + * of the filter table and @rule_locs contains the indices of the + * defined rules. + * + * For %ETHTOOL_SRXCLSRLINS, @fs specifies the filter rule to add or + * update. @fs.@location specifies the index to use and must not be + * ignored. + * + * For %ETHTOOL_SRXCLSRLDEL, @fs.@location specifies the index of an + * existing filter rule on entry. + * + * Implementation of indexed classification rules generally requires a + * TCAM. + */ struct ethtool_rxnfc { __u32 cmd; __u32 flow_type; - /* The rx flow hash value or the rule DB size */ __u64 data; - /* The following fields are not valid and must not be used for - * the ETHTOOL_{G,X}RXFH commands. */ struct ethtool_rx_flow_spec fs; __u32 rule_cnt; __u32 rule_locs[0]; }; +/** + * struct ethtool_rxfh_indir - command to get or set RX flow hash indirection + * @cmd: Specific command number - %ETHTOOL_GRXFHINDIR or %ETHTOOL_SRXFHINDIR + * @size: On entry, the array size of the user buffer. On return from + * %ETHTOOL_GRXFHINDIR, the array size of the hardware indirection table. + * @ring_index: RX ring/queue index for each hash value + */ struct ethtool_rxfh_indir { __u32 cmd; - /* On entry, this is the array size of the user buffer. On - * return from ETHTOOL_GRXFHINDIR, this is the array size of - * the hardware indirection table. */ __u32 size; - __u32 ring_index[0]; /* ring/queue index for each hash value */ + __u32 ring_index[0]; }; +/** + * struct ethtool_rx_ntuple_flow_spec - specification for RX flow filter + * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW + * @h_u: Flow field values to match (dependent on @flow_type) + * @m_u: Masks for flow field value bits to be ignored + * @vlan_tag: VLAN tag to match + * @vlan_tag_mask: Mask for VLAN tag bits to be ignored + * @data: Driver-dependent data to match + * @data_mask: Mask for driver-dependent data bits to be ignored + * @action: RX ring/queue index to deliver to (non-negative) or other action + * (negative, e.g. %ETHTOOL_RXNTUPLE_ACTION_DROP) + * + * For flow types %TCP_V4_FLOW, %UDP_V4_FLOW and %SCTP_V4_FLOW, where + * a field value and mask are both zero this is treated as if all mask + * bits are set i.e. the field is ignored. + */ struct ethtool_rx_ntuple_flow_spec { __u32 flow_type; union { @@ -403,22 +486,26 @@ struct ethtool_rx_ntuple_flow_spec { struct ethtool_tcpip4_spec sctp_ip4_spec; struct ethtool_ah_espip4_spec ah_ip4_spec; struct ethtool_ah_espip4_spec esp_ip4_spec; - struct ethtool_rawip4_spec raw_ip4_spec; - struct ethtool_ether_spec ether_spec; struct ethtool_usrip4_spec usr_ip4_spec; - __u8 hdata[64]; - } h_u, m_u; /* entry, mask */ + struct ethhdr ether_spec; + __u8 hdata[72]; + } h_u, m_u; __u16 vlan_tag; __u16 vlan_tag_mask; - __u64 data; /* user-defined flow spec data */ - __u64 data_mask; /* user-defined flow spec mask */ + __u64 data; + __u64 data_mask; - /* signed to distinguish between queue and actions (DROP) */ __s32 action; -#define ETHTOOL_RXNTUPLE_ACTION_DROP -1 +#define ETHTOOL_RXNTUPLE_ACTION_DROP (-1) /* drop packet */ +#define ETHTOOL_RXNTUPLE_ACTION_CLEAR (-2) /* clear filter */ }; +/** + * struct ethtool_rx_ntuple - command to set or clear RX flow filter + * @cmd: Command number - %ETHTOOL_SRXNTUPLE + * @fs: Flow filter specification + */ struct ethtool_rx_ntuple { __u32 cmd; struct ethtool_rx_ntuple_flow_spec fs; @@ -759,22 +846,23 @@ struct ethtool_ops { #define WAKE_MAGIC (1 << 5) #define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */ -/* L3-L4 network traffic flow types */ -#define TCP_V4_FLOW 0x01 -#define UDP_V4_FLOW 0x02 -#define SCTP_V4_FLOW 0x03 -#define AH_ESP_V4_FLOW 0x04 -#define TCP_V6_FLOW 0x05 -#define UDP_V6_FLOW 0x06 -#define SCTP_V6_FLOW 0x07 -#define AH_ESP_V6_FLOW 0x08 -#define AH_V4_FLOW 0x09 -#define ESP_V4_FLOW 0x0a -#define AH_V6_FLOW 0x0b -#define ESP_V6_FLOW 0x0c -#define IP_USER_FLOW 0x0d -#define IPV4_FLOW 0x10 -#define IPV6_FLOW 0x11 +/* L2-L4 network traffic flow types */ +#define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */ +#define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */ +#define SCTP_V4_FLOW 0x03 /* hash or spec (sctp_ip4_spec) */ +#define AH_ESP_V4_FLOW 0x04 /* hash only */ +#define TCP_V6_FLOW 0x05 /* hash only */ +#define UDP_V6_FLOW 0x06 /* hash only */ +#define SCTP_V6_FLOW 0x07 /* hash only */ +#define AH_ESP_V6_FLOW 0x08 /* hash only */ +#define AH_V4_FLOW 0x09 /* hash or spec (ah_ip4_spec) */ +#define ESP_V4_FLOW 0x0a /* hash or spec (esp_ip4_spec) */ +#define AH_V6_FLOW 0x0b /* hash only */ +#define ESP_V6_FLOW 0x0c /* hash only */ +#define IP_USER_FLOW 0x0d /* spec only (usr_ip4_spec) */ +#define IPV4_FLOW 0x10 /* hash only */ +#define IPV6_FLOW 0x11 /* hash only */ +#define ETHER_FLOW 0x12 /* spec only (ether_spec) */ /* L3-L4 network traffic flow hash options */ #define RXH_L2DA (1 << 1) diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index a9cd507f8cd2..28028988c862 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -67,6 +67,19 @@ enum fid_type { * 32 bit parent block number, 32 bit parent generation number */ FILEID_UDF_WITH_PARENT = 0x52, + + /* + * 64 bit checkpoint number, 64 bit inode number, + * 32 bit generation number. + */ + FILEID_NILFS_WITHOUT_PARENT = 0x61, + + /* + * 64 bit checkpoint number, 64 bit inode number, + * 32 bit generation number, 32 bit parent generation. + * 64 bit parent inode number. + */ + FILEID_NILFS_WITH_PARENT = 0x62, }; struct fid { diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index f59ed297b661..133c0ba25e30 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -31,7 +31,7 @@ struct embedded_fd_set { struct fdtable { unsigned int max_fds; - struct file ** fd; /* current fd array */ + struct file __rcu **fd; /* current fd array */ fd_set *close_on_exec; fd_set *open_fds; struct rcu_head rcu; @@ -46,7 +46,7 @@ struct files_struct { * read mostly part */ atomic_t count; - struct fdtable *fdt; + struct fdtable __rcu *fdt; struct fdtable fdtab; /* * written part on a separate cache line in SMP @@ -55,7 +55,7 @@ struct files_struct { int next_fd; struct embedded_fd_set close_on_exec_init; struct embedded_fd_set open_fds_init; - struct file * fd_array[NR_OPEN_DEFAULT]; + struct file __rcu * fd_array[NR_OPEN_DEFAULT]; }; #define rcu_dereference_check_fdtable(files, fdtfd) \ diff --git a/include/linux/fs.h b/include/linux/fs.h index 63d069bd80b7..4f34ff6e5558 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -135,12 +135,12 @@ struct inodes_stat_t { * immediately after submission. The write equivalent * of READ_SYNC. * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. - * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all - * previously submitted writes must be safely on storage - * before this one is started. Also guarantees that when - * this write is complete, it itself is also safely on - * storage. Prevents reordering of writes on both sides - * of this IO. + * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. + * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on + * non-volatile media on completion. + * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded + * by a cache flush and data is guaranteed to be on + * non-volatile media on completion. * */ #define RW_MASK REQ_WRITE @@ -156,16 +156,12 @@ struct inodes_stat_t { #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) #define WRITE_ODIRECT_PLUG (WRITE | REQ_SYNC) #define WRITE_META (WRITE | REQ_META) -#define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ - REQ_HARDBARRIER) - -/* - * These aren't really reads or writes, they pass down information about - * parts of device that are now unused by the file system. - */ -#define DISCARD_NOBARRIER (WRITE | REQ_DISCARD) -#define DISCARD_BARRIER (WRITE | REQ_DISCARD | REQ_HARDBARRIER) -#define DISCARD_SECURE (DISCARD_NOBARRIER | REQ_SECURE) +#define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FLUSH) +#define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FUA) +#define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FLUSH | REQ_FUA) #define SEL_IN 1 #define SEL_OUT 2 @@ -1093,10 +1089,6 @@ struct file_lock { #include <linux/fcntl.h> -/* temporary stubs for BKL removal */ -#define lock_flocks() lock_kernel() -#define unlock_flocks() unlock_kernel() - extern void send_sigio(struct fown_struct *fown, int fd, int band); #ifdef CONFIG_FILE_LOCKING @@ -1135,6 +1127,8 @@ extern int vfs_setlease(struct file *, long, struct file_lock **); extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count); +extern void lock_flocks(void); +extern void unlock_flocks(void); #else /* !CONFIG_FILE_LOCKING */ static inline int fcntl_getlk(struct file *file, struct flock __user *user) { @@ -1277,6 +1271,14 @@ static inline int lock_may_write(struct inode *inode, loff_t start, return 1; } +static inline void lock_flocks(void) +{ +} + +static inline void unlock_flocks(void) +{ +} + #endif /* !CONFIG_FILE_LOCKING */ @@ -1384,7 +1386,7 @@ struct super_block { * Saved mount options for lazy filesystems using * generic_show_options() */ - char *s_options; + char __rcu *s_options; }; extern struct timespec current_fs_time(struct super_block *sb); @@ -2378,6 +2380,8 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, extern int generic_file_fsync(struct file *, int); +extern int generic_check_addressable(unsigned, u64); + #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, struct page *, struct page *); @@ -2454,6 +2458,7 @@ static const struct file_operations __fops = { \ .release = simple_attr_release, \ .read = simple_attr_read, \ .write = simple_attr_write, \ + .llseek = generic_file_llseek, \ }; static inline void __attribute__((format(printf, 1, 2))) diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 28e33fea5107..4eb56ed75fbc 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -58,17 +58,35 @@ enum fsl_usb2_phy_modes { FSL_USB2_PHY_SERIAL, }; +struct clk; +struct platform_device; + struct fsl_usb2_platform_data { /* board specific information */ enum fsl_usb2_operating_modes operating_mode; enum fsl_usb2_phy_modes phy_mode; unsigned int port_enables; + unsigned int workaround; + + int (*init)(struct platform_device *); + void (*exit)(struct platform_device *); + void __iomem *regs; /* ioremap'd register base */ + struct clk *clk; + unsigned big_endian_mmio:1; + unsigned big_endian_desc:1; + unsigned es:1; /* need USBMODE:ES */ + unsigned le_setup_buf:1; + unsigned have_sysif_regs:1; + unsigned invert_drvvbus:1; + unsigned invert_pwr_fault:1; }; /* Flags in fsl_usb2_mph_platform_data */ #define FSL_USB2_PORT0_ENABLED 0x00000001 #define FSL_USB2_PORT1_ENABLED 0x00000002 +#define FLS_USB2_WORKAROUND_ENGCM09152 (1 << 0) + struct spi_device; struct fsl_spi_platform_data { diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 02b8b24f8f51..8beabb958f61 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -191,8 +191,8 @@ struct ftrace_event_call { unsigned int flags; #ifdef CONFIG_PERF_EVENTS - int perf_refcount; - struct hlist_head *perf_events; + int perf_refcount; + struct hlist_head __percpu *perf_events; #endif }; @@ -252,8 +252,8 @@ DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); -extern int perf_trace_enable(struct perf_event *event); -extern void perf_trace_disable(struct perf_event *event); +extern int perf_trace_add(struct perf_event *event, int flags); +extern void perf_trace_del(struct perf_event *event, int flags); extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 5f2f4c4d8fb0..557c3927e70f 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -12,6 +12,7 @@ #include <linux/types.h> #include <linux/kdev_t.h> #include <linux/rcupdate.h> +#include <linux/slab.h> #ifdef CONFIG_BLOCK @@ -86,7 +87,15 @@ struct disk_stats { unsigned long io_ticks; unsigned long time_in_queue; }; - + +#define PARTITION_META_INFO_VOLNAMELTH 64 +#define PARTITION_META_INFO_UUIDLTH 16 + +struct partition_meta_info { + u8 uuid[PARTITION_META_INFO_UUIDLTH]; /* always big endian */ + u8 volname[PARTITION_META_INFO_VOLNAMELTH]; +}; + struct hd_struct { sector_t start_sect; sector_t nr_sects; @@ -95,6 +104,7 @@ struct hd_struct { struct device __dev; struct kobject *holder_dir; int policy, partno; + struct partition_meta_info *info; #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; #endif @@ -129,8 +139,9 @@ struct blk_scsi_cmd_filter { struct disk_part_tbl { struct rcu_head rcu_head; int len; - struct hd_struct *last_lookup; - struct hd_struct *part[]; + struct hd_struct __rcu *last_lookup; + struct gendisk *disk; + struct hd_struct __rcu *part[]; }; struct gendisk { @@ -149,7 +160,7 @@ struct gendisk { * non-critical accesses use RCU. Always access through * helpers. */ - struct disk_part_tbl *part_tbl; + struct disk_part_tbl __rcu *part_tbl; struct hd_struct part0; const struct block_device_operations *fops; @@ -181,6 +192,30 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part) return NULL; } +static inline void part_pack_uuid(const u8 *uuid_str, u8 *to) +{ + int i; + for (i = 0; i < 16; ++i) { + *to++ = (hex_to_bin(*uuid_str) << 4) | + (hex_to_bin(*(uuid_str + 1))); + uuid_str += 2; + switch (i) { + case 3: + case 5: + case 7: + case 9: + uuid_str++; + continue; + } + } +} + +static inline char *part_unpack_uuid(const u8 *uuid, char *out) +{ + sprintf(out, "%pU", uuid); + return out; +} + static inline int disk_max_parts(struct gendisk *disk) { if (disk->flags & GENHD_FL_EXT_DEVT) @@ -342,6 +377,19 @@ static inline int part_in_flight(struct hd_struct *part) return part->in_flight[0] + part->in_flight[1]; } +static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk) +{ + if (disk) + return kzalloc_node(sizeof(struct partition_meta_info), + GFP_KERNEL, disk->node_id); + return kzalloc(sizeof(struct partition_meta_info), GFP_KERNEL); +} + +static inline void free_part_info(struct hd_struct *part) +{ + kfree(part->info); +} + /* block/blk-core.c */ extern void part_round_stats(int cpu, struct hd_struct *part); @@ -533,7 +581,9 @@ extern int disk_expand_part_tbl(struct gendisk *disk, int target); extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); extern struct hd_struct * __must_check add_partition(struct gendisk *disk, int partno, sector_t start, - sector_t len, int flags); + sector_t len, int flags, + struct partition_meta_info + *info); extern void delete_partition(struct gendisk *, int); extern void printk_all_partitions(void); diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index d5b387669dab..8a389b608ce3 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -8,7 +8,6 @@ #include <linux/lockdep.h> #include <linux/ftrace_irq.h> #include <asm/hardirq.h> -#include <asm/system.h> /* * We put the hardirq and softirq counter into the preemption @@ -64,6 +63,8 @@ #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) #define NMI_OFFSET (1UL << NMI_SHIFT) +#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) + #ifndef PREEMPT_ACTIVE #define PREEMPT_ACTIVE_BITS 1 #define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) @@ -82,10 +83,13 @@ /* * Are we doing bottom half or hardware interrupt processing? * Are we in a softirq context? Interrupt context? + * in_softirq - Are we currently processing softirq or have bh disabled? + * in_serving_softirq - Are we currently processing softirq? */ #define in_irq() (hardirq_count()) #define in_softirq() (softirq_count()) #define in_interrupt() (irq_count()) +#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) /* * Are we in NMI context? @@ -132,14 +136,16 @@ extern void synchronize_irq(unsigned int irq); struct task_struct; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING) static inline void account_system_vtime(struct task_struct *tsk) { } +#else +extern void account_system_vtime(struct task_struct *tsk); #endif #if defined(CONFIG_NO_HZ) -#if defined(CONFIG_TINY_RCU) +#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) extern void rcu_enter_nohz(void); extern void rcu_exit_nohz(void); diff --git a/include/linux/htirq.h b/include/linux/htirq.h index c96ea46737d0..70a1dbbf2093 100644 --- a/include/linux/htirq.h +++ b/include/linux/htirq.h @@ -9,8 +9,9 @@ struct ht_irq_msg { /* Helper functions.. */ void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); -void mask_ht_irq(unsigned int irq); -void unmask_ht_irq(unsigned int irq); +struct irq_data; +void mask_ht_irq(struct irq_data *data); +void unmask_ht_irq(struct irq_data *data); /* The arch hook for getting things started */ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev); diff --git a/include/linux/idr.h b/include/linux/idr.h index e968db71e33a..cdb715e58e3e 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -50,14 +50,14 @@ struct idr_layer { unsigned long bitmap; /* A zero bit means "space here" */ - struct idr_layer *ary[1<<IDR_BITS]; + struct idr_layer __rcu *ary[1<<IDR_BITS]; int count; /* When zero, we can release it */ int layer; /* distance from leaf */ struct rcu_head rcu_head; }; struct idr { - struct idr_layer *top; + struct idr_layer __rcu *top; struct idr_layer *id_free; int layers; /* only valid without concurrent changes */ int id_free_cnt; diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 97b2eae6a22c..ed5a03cbe184 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -986,6 +986,7 @@ struct ieee80211_ht_info { #define WLAN_AUTH_OPEN 0 #define WLAN_AUTH_SHARED_KEY 1 #define WLAN_AUTH_FT 2 +#define WLAN_AUTH_SAE 3 #define WLAN_AUTH_LEAP 128 #define WLAN_AUTH_CHALLENGE_LEN 128 @@ -1072,6 +1073,10 @@ enum ieee80211_statuscode { WLAN_STATUS_NO_DIRECT_LINK = 48, WLAN_STATUS_STA_NOT_PRESENT = 49, WLAN_STATUS_STA_NOT_QSTA = 50, + /* 802.11s */ + WLAN_STATUS_ANTI_CLOG_REQUIRED = 76, + WLAN_STATUS_FCG_NOT_SUPP = 78, + WLAN_STATUS_STA_NO_TBTT = 78, }; @@ -1112,6 +1117,22 @@ enum ieee80211_reasoncode { WLAN_REASON_QSTA_REQUIRE_SETUP = 38, WLAN_REASON_QSTA_TIMEOUT = 39, WLAN_REASON_QSTA_CIPHER_NOT_SUPP = 45, + /* 802.11s */ + WLAN_REASON_MESH_PEER_CANCELED = 52, + WLAN_REASON_MESH_MAX_PEERS = 53, + WLAN_REASON_MESH_CONFIG = 54, + WLAN_REASON_MESH_CLOSE = 55, + WLAN_REASON_MESH_MAX_RETRIES = 56, + WLAN_REASON_MESH_CONFIRM_TIMEOUT = 57, + WLAN_REASON_MESH_INVALID_GTK = 58, + WLAN_REASON_MESH_INCONSISTENT_PARAM = 59, + WLAN_REASON_MESH_INVALID_SECURITY = 60, + WLAN_REASON_MESH_PATH_ERROR = 61, + WLAN_REASON_MESH_PATH_NOFORWARD = 62, + WLAN_REASON_MESH_PATH_DEST_UNREACHABLE = 63, + WLAN_REASON_MAC_EXISTS_IN_MBSS = 64, + WLAN_REASON_MESH_CHAN_REGULATORY = 65, + WLAN_REASON_MESH_CHAN = 66, }; @@ -1139,20 +1160,33 @@ enum ieee80211_eid { WLAN_EID_TS_DELAY = 43, WLAN_EID_TCLAS_PROCESSING = 44, WLAN_EID_QOS_CAPA = 46, - /* 802.11s - * - * All mesh EID numbers are pending IEEE 802.11 ANA approval. - * The numbers have been incremented from those suggested in - * 802.11s/D2.0 so that MESH_CONFIG does not conflict with - * EXT_SUPP_RATES. + /* 802.11s */ + WLAN_EID_MESH_CONFIG = 113, + WLAN_EID_MESH_ID = 114, + WLAN_EID_LINK_METRIC_REPORT = 115, + WLAN_EID_CONGESTION_NOTIFICATION = 116, + /* Note that the Peer Link IE has been replaced with the similar + * Peer Management IE. We will keep the former definition until mesh + * code is changed to comply with latest 802.11s drafts. */ - WLAN_EID_MESH_CONFIG = 51, - WLAN_EID_MESH_ID = 52, - WLAN_EID_PEER_LINK = 55, - WLAN_EID_PREQ = 68, - WLAN_EID_PREP = 69, - WLAN_EID_PERR = 70, - WLAN_EID_RANN = 49, /* compatible with FreeBSD */ + WLAN_EID_PEER_LINK = 55, /* no longer in 802.11s drafts */ + WLAN_EID_PEER_MGMT = 117, + WLAN_EID_CHAN_SWITCH_PARAM = 118, + WLAN_EID_MESH_AWAKE_WINDOW = 119, + WLAN_EID_BEACON_TIMING = 120, + WLAN_EID_MCCAOP_SETUP_REQ = 121, + WLAN_EID_MCCAOP_SETUP_RESP = 122, + WLAN_EID_MCCAOP_ADVERT = 123, + WLAN_EID_MCCAOP_TEARDOWN = 124, + WLAN_EID_GANN = 125, + WLAN_EID_RANN = 126, + WLAN_EID_PREQ = 130, + WLAN_EID_PREP = 131, + WLAN_EID_PERR = 132, + WLAN_EID_PXU = 137, + WLAN_EID_PXUC = 138, + WLAN_EID_AUTH_MESH_PEER_EXCH = 139, + WLAN_EID_MIC = 140, WLAN_EID_PWR_CONSTRAINT = 32, WLAN_EID_PWR_CAPABILITY = 33, @@ -1211,9 +1245,14 @@ enum ieee80211_category { WLAN_CATEGORY_HT = 7, WLAN_CATEGORY_SA_QUERY = 8, WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION = 9, + WLAN_CATEGORY_MESH_ACTION = 13, + WLAN_CATEGORY_MULTIHOP_ACTION = 14, + WLAN_CATEGORY_SELF_PROTECTED = 15, WLAN_CATEGORY_WMM = 17, - WLAN_CATEGORY_MESH_PLINK = 30, /* Pending ANA approval */ - WLAN_CATEGORY_MESH_PATH_SEL = 32, /* Pending ANA approval */ + /* TODO: remove MESH_PLINK and MESH_PATH_SEL after */ + /* mesh is updated to current 802.11s draft */ + WLAN_CATEGORY_MESH_PLINK = 30, + WLAN_CATEGORY_MESH_PATH_SEL = 32, WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126, WLAN_CATEGORY_VENDOR_SPECIFIC = 127, }; @@ -1351,6 +1390,8 @@ enum ieee80211_sa_query_action { /* AKM suite selectors */ #define WLAN_AKM_SUITE_8021X 0x000FAC01 #define WLAN_AKM_SUITE_PSK 0x000FAC02 +#define WLAN_AKM_SUITE_SAE 0x000FAC08 +#define WLAN_AKM_SUITE_FT_OVER_SAE 0x000FAC09 #define WLAN_MAX_KEY_LEN 32 diff --git a/include/linux/if.h b/include/linux/if.h index 53558ec59e1b..123959927745 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -75,6 +75,8 @@ #define IFF_DISABLE_NETPOLL 0x2000 /* disable netpoll at run-time */ #define IFF_MACVLAN_PORT 0x4000 /* device used as macvlan port */ #define IFF_BRIDGE_PORT 0x8000 /* device used as bridge port */ +#define IFF_OVS_DATAPATH 0x10000 /* device used as Open vSwitch + * datapath port */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h index 2c7994372bde..a17edda8a781 100644 --- a/include/linux/if_bonding.h +++ b/include/linux/if_bonding.h @@ -84,6 +84,9 @@ #define BOND_DEFAULT_MAX_BONDS 1 /* Default maximum number of devices to support */ #define BOND_DEFAULT_TX_QUEUES 16 /* Default number of tx queues per device */ + +#define BOND_DEFAULT_RESEND_IGMP 1 /* Default number of IGMP membership reports */ + /* hashing types */ #define BOND_XMIT_POLICY_LAYER2 0 /* layer 2 (MAC only), default */ #define BOND_XMIT_POLICY_LAYER34 1 /* layer 3+4 (IP ^ (TCP || UDP)) */ diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index bed7a4682b90..f9c3df03db0f 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -137,8 +137,6 @@ extern struct ctl_table ether_table[]; extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len); -#define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x" - #endif #endif /* _LINUX_IF_ETHER_H */ diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 35280b302290..8a2fd66a8b5f 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -40,6 +40,12 @@ struct macvlan_rx_stats { unsigned long rx_errors; }; +/* + * Maximum times a macvtap device can be opened. This can be used to + * configure the number of receive queue, e.g. for multiqueue virtio. + */ +#define MAX_MACVTAP_QUEUES (NR_CPUS < 16 ? NR_CPUS : 16) + struct macvlan_dev { struct net_device *dev; struct list_head list; @@ -50,7 +56,8 @@ struct macvlan_dev { enum macvlan_mode mode; int (*receive)(struct sk_buff *skb); int (*forward)(struct net_device *dev, struct sk_buff *skb); - struct macvtap_queue *tap; + struct macvtap_queue *taps[MAX_MACVTAP_QUEUES]; + int numvtaps; }; static inline void macvlan_count_rx(const struct macvlan_dev *vlan, diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 27741e05446f..397921b09ef9 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -40,25 +40,35 @@ * PPPoE addressing definition */ typedef __be16 sid_t; -struct pppoe_addr{ - sid_t sid; /* Session identifier */ - unsigned char remote[ETH_ALEN]; /* Remote address */ - char dev[IFNAMSIZ]; /* Local device to use */ +struct pppoe_addr { + sid_t sid; /* Session identifier */ + unsigned char remote[ETH_ALEN]; /* Remote address */ + char dev[IFNAMSIZ]; /* Local device to use */ }; /************************************************************************ - * Protocols supported by AF_PPPOX - */ + * PPTP addressing definition + */ +struct pptp_addr { + __be16 call_id; + struct in_addr sin_addr; +}; + +/************************************************************************ + * Protocols supported by AF_PPPOX + */ #define PX_PROTO_OE 0 /* Currently just PPPoE */ #define PX_PROTO_OL2TP 1 /* Now L2TP also */ -#define PX_MAX_PROTO 2 - -struct sockaddr_pppox { - sa_family_t sa_family; /* address family, AF_PPPOX */ - unsigned int sa_protocol; /* protocol identifier */ - union{ - struct pppoe_addr pppoe; - }sa_addr; +#define PX_PROTO_PPTP 2 +#define PX_MAX_PROTO 3 + +struct sockaddr_pppox { + sa_family_t sa_family; /* address family, AF_PPPOX */ + unsigned int sa_protocol; /* protocol identifier */ + union { + struct pppoe_addr pppoe; + struct pptp_addr pptp; + } sa_addr; } __attribute__((packed)); /* The use of the above union isn't viable because the size of this @@ -150,15 +160,23 @@ struct pppoe_opt { relayed to (PPPoE relaying) */ }; +struct pptp_opt { + struct pptp_addr src_addr; + struct pptp_addr dst_addr; + u32 ack_sent, ack_recv; + u32 seq_sent, seq_recv; + int ppp_flags; +}; #include <net/sock.h> struct pppox_sock { /* struct sock must be the first member of pppox_sock */ - struct sock sk; - struct ppp_channel chan; + struct sock sk; + struct ppp_channel chan; struct pppox_sock *next; /* for hash table */ union { struct pppoe_opt pppoe; + struct pptp_opt pptp; } proto; __be16 num; }; @@ -186,7 +204,7 @@ struct pppox_proto { struct module *owner; }; -extern int register_pppox_proto(int proto_num, struct pppox_proto *pp); +extern int register_pppox_proto(int proto_num, const struct pppox_proto *pp); extern void unregister_pppox_proto(int proto_num); extern void pppox_unbind_sock(struct sock *sk);/* delete ppp-channel binding */ extern int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 3d870fda8c4f..c2f3a72712ce 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -16,6 +16,7 @@ #ifdef __KERNEL__ #include <linux/netdevice.h> #include <linux/etherdevice.h> +#include <linux/rtnetlink.h> #define VLAN_HLEN 4 /* The additional bytes (on top of the Ethernet header) * that VLAN requires. @@ -68,6 +69,7 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) #define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */ #define VLAN_TAG_PRESENT VLAN_CFI_MASK #define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ +#define VLAN_N_VID 4096 /* found in socket.c */ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); @@ -76,9 +78,8 @@ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); * depends on completely exhausting the VLAN identifier space. Thus * it gives constant time look-up, but in many cases it wastes memory. */ -#define VLAN_GROUP_ARRAY_LEN 4096 #define VLAN_GROUP_ARRAY_SPLIT_PARTS 8 -#define VLAN_GROUP_ARRAY_PART_LEN (VLAN_GROUP_ARRAY_LEN/VLAN_GROUP_ARRAY_SPLIT_PARTS) +#define VLAN_GROUP_ARRAY_PART_LEN (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS) struct vlan_group { struct net_device *real_dev; /* The ethernet(like) device @@ -114,12 +115,24 @@ static inline void vlan_group_set_device(struct vlan_group *vg, #define vlan_tx_tag_get(__skb) ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT) #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +/* Must be invoked with rcu_read_lock or with RTNL. */ +static inline struct net_device *vlan_find_dev(struct net_device *real_dev, + u16 vlan_id) +{ + struct vlan_group *grp = rcu_dereference_rtnl(real_dev->vlgrp); + + if (grp) + return vlan_group_get_device(grp, vlan_id); + + return NULL; +} + extern struct net_device *vlan_dev_real_dev(const struct net_device *dev); extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling); -extern int vlan_hwaccel_do_receive(struct sk_buff *skb); +extern bool vlan_hwaccel_do_receive(struct sk_buff **skb); extern gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, unsigned int vlan_tci, struct sk_buff *skb); @@ -128,6 +141,12 @@ vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, unsigned int vlan_tci); #else +static inline struct net_device *vlan_find_dev(struct net_device *real_dev, + u16 vlan_id) +{ + return NULL; +} + static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev) { BUG(); @@ -147,9 +166,11 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, return NET_XMIT_SUCCESS; } -static inline int vlan_hwaccel_do_receive(struct sk_buff *skb) +static inline bool vlan_hwaccel_do_receive(struct sk_buff **skb) { - return 0; + if ((*skb)->vlan_tci & VLAN_VID_MASK) + (*skb)->pkt_type = PACKET_OTHERHOST; + return false; } static inline gro_result_t diff --git a/include/linux/in.h b/include/linux/in.h index 41d88a4689af..beeb6dee2b49 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -250,6 +250,25 @@ struct sockaddr_in { #ifdef __KERNEL__ +#include <linux/errno.h> + +static inline int proto_ports_offset(int proto) +{ + switch (proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_DCCP: + case IPPROTO_ESP: /* SPI */ + case IPPROTO_SCTP: + case IPPROTO_UDPLITE: + return 0; + case IPPROTO_AH: /* SPI */ + return 4; + default: + return -EINVAL; + } +} + static inline bool ipv4_is_loopback(__be32 addr) { return (addr & htonl(0xff000000)) == htonl(0x7f000000); diff --git a/include/linux/in6.h b/include/linux/in6.h index c4bf46f764bf..097a34b55560 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -268,6 +268,10 @@ struct in6_flowlabel_req { /* RFC5082: Generalized Ttl Security Mechanism */ #define IPV6_MINHOPCOUNT 73 +#define IPV6_ORIGDSTADDR 74 +#define IPV6_RECVORIGDSTADDR IPV6_ORIGDSTADDR +#define IPV6_TRANSPARENT 75 + /* * Multicast Routing: * see include/linux/mroute6.h. diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 2be1a1a2beb9..ccd5b07d678d 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -9,6 +9,7 @@ #include <linux/rcupdate.h> #include <linux/timer.h> #include <linux/sysctl.h> +#include <linux/rtnetlink.h> enum { @@ -158,7 +159,12 @@ struct in_ifaddr { extern int register_inetaddr_notifier(struct notifier_block *nb); extern int unregister_inetaddr_notifier(struct notifier_block *nb); -extern struct net_device *ip_dev_find(struct net *net, __be32 addr); +extern struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref); +static inline struct net_device *ip_dev_find(struct net *net, __be32 addr) +{ + return __ip_dev_find(net, addr, true); +} + extern int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); extern int devinet_ioctl(struct net *net, unsigned int cmd, void __user *); extern void devinet_init(void); @@ -198,14 +204,10 @@ static __inline__ int bad_mask(__be32 mask, __be32 addr) static inline struct in_device *__in_dev_get_rcu(const struct net_device *dev) { - struct in_device *in_dev = dev->ip_ptr; - if (in_dev) - in_dev = rcu_dereference(in_dev); - return in_dev; + return rcu_dereference(dev->ip_ptr); } -static __inline__ struct in_device * -in_dev_get(const struct net_device *dev) +static inline struct in_device *in_dev_get(const struct net_device *dev) { struct in_device *in_dev; @@ -217,10 +219,9 @@ in_dev_get(const struct net_device *dev) return in_dev; } -static __inline__ struct in_device * -__in_dev_get_rtnl(const struct net_device *dev) +static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev) { - return (struct in_device*)dev->ip_ptr; + return rcu_dereference_check(dev->ip_ptr, lockdep_rtnl_is_held()); } extern void in_dev_finish_destroy(struct in_device *idev); diff --git a/include/linux/init.h b/include/linux/init.h index de994304e0bb..577671c55153 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -46,16 +46,23 @@ #define __exitdata __section(.exit.data) #define __exit_call __used __section(.exitcall.exit) -/* modpost check for section mismatches during the kernel build. +/* + * modpost check for section mismatches during the kernel build. * A section mismatch happens when there are references from a * code or data section to an init section (both code or data). * The init sections are (for most archs) discarded by the kernel * when early init has completed so all such references are potential bugs. * For exit sections the same issue exists. + * * The following markers are used for the cases where the reference to * the *init / *exit section (code or data) is valid and will teach - * modpost not to issue a warning. - * The markers follow same syntax rules as __init / __initdata. */ + * modpost not to issue a warning. Intended semantics is that a code or + * data tagged __ref* can reference code or data from init section without + * producing a warning (of course, no warning does not mean code is + * correct, so optimally document why the __ref is needed and why it's OK). + * + * The markers follow same syntax rules as __init / __initdata. + */ #define __ref __section(.ref.text) noinline #define __refdata __section(.ref.data) #define __refconst __section(.ref.rodata) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 1f43fa56f600..2fea6c8ef6ba 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -82,11 +82,17 @@ extern struct group_info init_groups; # define CAP_INIT_BSET CAP_FULL_SET #ifdef CONFIG_TREE_PREEMPT_RCU +#define INIT_TASK_RCU_TREE_PREEMPT() \ + .rcu_blocked_node = NULL, +#else +#define INIT_TASK_RCU_TREE_PREEMPT(tsk) +#endif +#ifdef CONFIG_PREEMPT_RCU #define INIT_TASK_RCU_PREEMPT(tsk) \ .rcu_read_lock_nesting = 0, \ .rcu_read_unlock_special = 0, \ - .rcu_blocked_node = NULL, \ - .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), + .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ + INIT_TASK_RCU_TREE_PREEMPT() #else #define INIT_TASK_RCU_PREEMPT(tsk) #endif @@ -137,8 +143,8 @@ extern struct cred init_cred; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ - .real_cred = &init_cred, \ - .cred = &init_cred, \ + RCU_INIT_POINTER(.real_cred, &init_cred), \ + RCU_INIT_POINTER(.cred, &init_cred), \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ .comm = "swapper", \ diff --git a/include/linux/input.h b/include/linux/input.h index 896a92227bc4..d6ae1761be97 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -1196,7 +1196,7 @@ struct input_dev { int (*flush)(struct input_dev *dev, struct file *file); int (*event)(struct input_dev *dev, unsigned int type, unsigned int code, int value); - struct input_handle *grab; + struct input_handle __rcu *grab; spinlock_t event_lock; struct mutex mutex; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a0384a4d1e6f..01b281646251 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -18,6 +18,7 @@ #include <asm/atomic.h> #include <asm/ptrace.h> #include <asm/system.h> +#include <trace/events/irq.h> /* * These correspond to the IORESOURCE_IRQ_* defines in @@ -407,10 +408,14 @@ asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); -#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) +static inline void __raise_softirq_irqoff(unsigned int nr) +{ + trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL); + or_softirq_pending(1UL << nr); +} + extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); -extern void wakeup_softirqd(void); /* This is the worklist that queues up per-cpu softirq work. * @@ -641,11 +646,8 @@ static inline void init_irq_proc(void) struct seq_file; int show_interrupts(struct seq_file *p, void *v); -struct irq_desc; - extern int early_irq_init(void); extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); -extern int arch_init_chip_data(struct irq_desc *desc, int node); #endif diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 64d529133031..3e70b21884a9 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -53,7 +53,7 @@ struct io_context { struct radix_tree_root radix_root; struct hlist_head cic_list; - void *ioc_data; + void __rcu *ioc_data; }; static inline struct io_context *ioc_task_link(struct io_context *ioc) diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index 9708de265bb1..5f43a3b2e3ad 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h @@ -70,6 +70,7 @@ /* * IPVS Connection Flags + * Only flags 0..15 are sent to backup server */ #define IP_VS_CONN_F_FWD_MASK 0x0007 /* mask for the fwd methods */ #define IP_VS_CONN_F_MASQ 0x0000 /* masquerading/NAT */ @@ -88,9 +89,20 @@ #define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ #define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ +/* Flags that are not sent to backup server start from bit 16 */ +#define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */ + +/* Connection flags from destination that can be changed by user space */ +#define IP_VS_CONN_F_DEST_MASK (IP_VS_CONN_F_FWD_MASK | \ + IP_VS_CONN_F_ONE_PACKET | \ + IP_VS_CONN_F_NFCT | \ + 0) + #define IP_VS_SCHEDNAME_MAXLEN 16 +#define IP_VS_PENAME_MAXLEN 16 #define IP_VS_IFNAME_MAXLEN 16 +#define IP_VS_PEDATA_MAXLEN 255 /* * The struct ip_vs_service_user and struct ip_vs_dest_user are @@ -324,6 +336,9 @@ enum { IPVS_SVC_ATTR_NETMASK, /* persistent netmask */ IPVS_SVC_ATTR_STATS, /* nested attribute for service stats */ + + IPVS_SVC_ATTR_PE_NAME, /* name of ct retriever */ + __IPVS_SVC_ATTR_MAX, }; diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e62683ba88e6..8e429d0e0405 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -341,7 +341,9 @@ struct ipv6_pinfo { odstopts:1, rxflow:1, rxtclass:1, - rxpmtu:1; + rxpmtu:1, + rxorigdstaddr:1; + /* 2 bits hole */ } bits; __u16 all; } rxopt; diff --git a/include/linux/irq.h b/include/linux/irq.h index c03243ad84b4..e9639115dff1 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -72,6 +72,10 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, #define IRQ_ONESHOT 0x08000000 /* IRQ is not unmasked after hardirq */ #define IRQ_NESTED_THREAD 0x10000000 /* IRQ is nested into another, no own handler thread */ +#define IRQF_MODIFY_MASK \ + (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ + IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL) + #ifdef CONFIG_IRQ_PER_CPU # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) # define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) @@ -80,36 +84,77 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, # define IRQ_NO_BALANCING_MASK IRQ_NO_BALANCING #endif -struct proc_dir_entry; struct msi_desc; /** + * struct irq_data - per irq and irq chip data passed down to chip functions + * @irq: interrupt number + * @node: node index useful for balancing + * @chip: low level interrupt hardware access + * @handler_data: per-IRQ data for the irq_chip methods + * @chip_data: platform-specific per-chip private data for the chip + * methods, to allow shared chip implementations + * @msi_desc: MSI descriptor + * @affinity: IRQ affinity on SMP + * + * The fields here need to overlay the ones in irq_desc until we + * cleaned up the direct references and switched everything over to + * irq_data. + */ +struct irq_data { + unsigned int irq; + unsigned int node; + struct irq_chip *chip; + void *handler_data; + void *chip_data; + struct msi_desc *msi_desc; +#ifdef CONFIG_SMP + cpumask_var_t affinity; +#endif +}; + +/** * struct irq_chip - hardware interrupt chip descriptor * * @name: name for /proc/interrupts - * @startup: start up the interrupt (defaults to ->enable if NULL) - * @shutdown: shut down the interrupt (defaults to ->disable if NULL) - * @enable: enable the interrupt (defaults to chip->unmask if NULL) - * @disable: disable the interrupt - * @ack: start of a new interrupt - * @mask: mask an interrupt source - * @mask_ack: ack and mask an interrupt source - * @unmask: unmask an interrupt source - * @eoi: end of interrupt - chip level - * @end: end of interrupt - flow level - * @set_affinity: set the CPU affinity on SMP machines - * @retrigger: resend an IRQ to the CPU - * @set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ - * @set_wake: enable/disable power-management wake-on of an IRQ + * @startup: deprecated, replaced by irq_startup + * @shutdown: deprecated, replaced by irq_shutdown + * @enable: deprecated, replaced by irq_enable + * @disable: deprecated, replaced by irq_disable + * @ack: deprecated, replaced by irq_ack + * @mask: deprecated, replaced by irq_mask + * @mask_ack: deprecated, replaced by irq_mask_ack + * @unmask: deprecated, replaced by irq_unmask + * @eoi: deprecated, replaced by irq_eoi + * @end: deprecated, will go away with __do_IRQ() + * @set_affinity: deprecated, replaced by irq_set_affinity + * @retrigger: deprecated, replaced by irq_retrigger + * @set_type: deprecated, replaced by irq_set_type + * @set_wake: deprecated, replaced by irq_wake + * @bus_lock: deprecated, replaced by irq_bus_lock + * @bus_sync_unlock: deprecated, replaced by irq_bus_sync_unlock * - * @bus_lock: function to lock access to slow bus (i2c) chips - * @bus_sync_unlock: function to sync and unlock slow bus (i2c) chips + * @irq_startup: start up the interrupt (defaults to ->enable if NULL) + * @irq_shutdown: shut down the interrupt (defaults to ->disable if NULL) + * @irq_enable: enable the interrupt (defaults to chip->unmask if NULL) + * @irq_disable: disable the interrupt + * @irq_ack: start of a new interrupt + * @irq_mask: mask an interrupt source + * @irq_mask_ack: ack and mask an interrupt source + * @irq_unmask: unmask an interrupt source + * @irq_eoi: end of interrupt + * @irq_set_affinity: set the CPU affinity on SMP machines + * @irq_retrigger: resend an IRQ to the CPU + * @irq_set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ + * @irq_set_wake: enable/disable power-management wake-on of an IRQ + * @irq_bus_lock: function to lock access to slow bus (i2c) chips + * @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips * * @release: release function solely used by UML - * @typename: obsoleted by name, kept as migration helper */ struct irq_chip { const char *name; +#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED unsigned int (*startup)(unsigned int irq); void (*shutdown)(unsigned int irq); void (*enable)(unsigned int irq); @@ -130,154 +175,66 @@ struct irq_chip { void (*bus_lock)(unsigned int irq); void (*bus_sync_unlock)(unsigned int irq); +#endif + unsigned int (*irq_startup)(struct irq_data *data); + void (*irq_shutdown)(struct irq_data *data); + void (*irq_enable)(struct irq_data *data); + void (*irq_disable)(struct irq_data *data); + + void (*irq_ack)(struct irq_data *data); + void (*irq_mask)(struct irq_data *data); + void (*irq_mask_ack)(struct irq_data *data); + void (*irq_unmask)(struct irq_data *data); + void (*irq_eoi)(struct irq_data *data); + + int (*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force); + int (*irq_retrigger)(struct irq_data *data); + int (*irq_set_type)(struct irq_data *data, unsigned int flow_type); + int (*irq_set_wake)(struct irq_data *data, unsigned int on); + + void (*irq_bus_lock)(struct irq_data *data); + void (*irq_bus_sync_unlock)(struct irq_data *data); /* Currently used only by UML, might disappear one day.*/ #ifdef CONFIG_IRQ_RELEASE_METHOD void (*release)(unsigned int irq, void *dev_id); #endif - /* - * For compatibility, ->typename is copied into ->name. - * Will disappear. - */ - const char *typename; }; -struct timer_rand_state; -struct irq_2_iommu; -/** - * struct irq_desc - interrupt descriptor - * @irq: interrupt number for this descriptor - * @timer_rand_state: pointer to timer rand state struct - * @kstat_irqs: irq stats per cpu - * @irq_2_iommu: iommu with this irq - * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()] - * @chip: low level interrupt hardware access - * @msi_desc: MSI descriptor - * @handler_data: per-IRQ data for the irq_chip methods - * @chip_data: platform-specific per-chip private data for the chip - * methods, to allow shared chip implementations - * @action: the irq action chain - * @status: status information - * @depth: disable-depth, for nested irq_disable() calls - * @wake_depth: enable depth, for multiple set_irq_wake() callers - * @irq_count: stats field to detect stalled irqs - * @last_unhandled: aging timer for unhandled count - * @irqs_unhandled: stats field for spurious unhandled interrupts - * @lock: locking for SMP - * @affinity: IRQ affinity on SMP - * @node: node index useful for balancing - * @pending_mask: pending rebalanced interrupts - * @threads_active: number of irqaction threads currently running - * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers - * @dir: /proc/irq/ procfs entry - * @name: flow handler name for /proc/interrupts output - */ -struct irq_desc { - unsigned int irq; - struct timer_rand_state *timer_rand_state; - unsigned int *kstat_irqs; -#ifdef CONFIG_INTR_REMAP - struct irq_2_iommu *irq_2_iommu; -#endif - irq_flow_handler_t handle_irq; - struct irq_chip *chip; - struct msi_desc *msi_desc; - void *handler_data; - void *chip_data; - struct irqaction *action; /* IRQ action list */ - unsigned int status; /* IRQ status */ - - unsigned int depth; /* nested irq disables */ - unsigned int wake_depth; /* nested wake enables */ - unsigned int irq_count; /* For detecting broken IRQs */ - unsigned long last_unhandled; /* Aging timer for unhandled count */ - unsigned int irqs_unhandled; - raw_spinlock_t lock; -#ifdef CONFIG_SMP - cpumask_var_t affinity; - const struct cpumask *affinity_hint; - unsigned int node; -#ifdef CONFIG_GENERIC_PENDING_IRQ - cpumask_var_t pending_mask; -#endif -#endif - atomic_t threads_active; - wait_queue_head_t wait_for_threads; -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *dir; -#endif - const char *name; -} ____cacheline_internodealigned_in_smp; +/* This include will go away once we isolated irq_desc usage to core code */ +#include <linux/irqdesc.h> -extern void arch_init_copy_chip_data(struct irq_desc *old_desc, - struct irq_desc *desc, int node); -extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); +/* + * Pick up the arch-dependent methods: + */ +#include <asm/hw_irq.h> -#ifndef CONFIG_SPARSE_IRQ -extern struct irq_desc irq_desc[NR_IRQS]; +#ifndef NR_IRQS_LEGACY +# define NR_IRQS_LEGACY 0 #endif -#ifdef CONFIG_NUMA_IRQ_DESC -extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node); -#else -static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) -{ - return desc; -} +#ifndef ARCH_IRQ_INIT_FLAGS +# define ARCH_IRQ_INIT_FLAGS 0 #endif -extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node); - -/* - * Pick up the arch-dependent methods: - */ -#include <asm/hw_irq.h> +#define IRQ_DEFAULT_INIT_FLAGS (IRQ_DISABLED | ARCH_IRQ_INIT_FLAGS) +struct irqaction; extern int setup_irq(unsigned int irq, struct irqaction *new); extern void remove_irq(unsigned int irq, struct irqaction *act); #ifdef CONFIG_GENERIC_HARDIRQS -#ifdef CONFIG_SMP - -#ifdef CONFIG_GENERIC_PENDING_IRQ - +#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) void move_native_irq(int irq); void move_masked_irq(int irq); - -#else /* CONFIG_GENERIC_PENDING_IRQ */ - -static inline void move_irq(int irq) -{ -} - -static inline void move_native_irq(int irq) -{ -} - -static inline void move_masked_irq(int irq) -{ -} - -#endif /* CONFIG_GENERIC_PENDING_IRQ */ - -#else /* CONFIG_SMP */ - -#define move_native_irq(x) -#define move_masked_irq(x) - -#endif /* CONFIG_SMP */ +#else +static inline void move_native_irq(int irq) { } +static inline void move_masked_irq(int irq) { } +#endif extern int no_irq_affinity; -static inline int irq_balancing_disabled(unsigned int irq) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - return desc->status & IRQ_NO_BALANCING_MASK; -} - /* Handle irq action chains: */ extern irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action); @@ -293,42 +250,10 @@ extern void handle_percpu_irq(unsigned int irq, struct irq_desc *desc); extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc); extern void handle_nested_irq(unsigned int irq); -/* - * Monolithic do_IRQ implementation. - */ -#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ -extern unsigned int __do_IRQ(unsigned int irq); -#endif - -/* - * Architectures call this to let the generic IRQ layer - * handle an interrupt. If the descriptor is attached to an - * irqchip-style controller then we call the ->handle_irq() handler, - * and it calls __do_IRQ() if it's attached to an irqtype-style controller. - */ -static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc) -{ -#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ - desc->handle_irq(irq, desc); -#else - if (likely(desc->handle_irq)) - desc->handle_irq(irq, desc); - else - __do_IRQ(irq); -#endif -} - -static inline void generic_handle_irq(unsigned int irq) -{ - generic_handle_irq_desc(irq, irq_to_desc(irq)); -} - /* Handling of unhandled and spurious interrupts: */ extern void note_interrupt(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret); -/* Resending of interrupts :*/ -void check_irq_resend(struct irq_desc *desc, unsigned int irq); /* Enable/disable irq debugging output: */ extern int noirqdebug_setup(char *str); @@ -351,16 +276,6 @@ extern void __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, const char *name); -/* caller has locked the irq_desc and both params are valid */ -static inline void __set_irq_handler_unlocked(int irq, - irq_flow_handler_t handler) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - desc->handle_irq = handler; -} - /* * Set a highlevel flow handler for a given IRQ: */ @@ -384,141 +299,121 @@ set_irq_chained_handler(unsigned int irq, extern void set_irq_nested_thread(unsigned int irq, int nest); -extern void set_irq_noprobe(unsigned int irq); -extern void set_irq_probe(unsigned int irq); +void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set); + +static inline void irq_set_status_flags(unsigned int irq, unsigned long set) +{ + irq_modify_status(irq, 0, set); +} + +static inline void irq_clear_status_flags(unsigned int irq, unsigned long clr) +{ + irq_modify_status(irq, clr, 0); +} + +static inline void set_irq_noprobe(unsigned int irq) +{ + irq_modify_status(irq, 0, IRQ_NOPROBE); +} + +static inline void set_irq_probe(unsigned int irq) +{ + irq_modify_status(irq, IRQ_NOPROBE, 0); +} /* Handle dynamic irq creation and destruction */ extern unsigned int create_irq_nr(unsigned int irq_want, int node); extern int create_irq(void); extern void destroy_irq(unsigned int irq); -/* Test to see if a driver has successfully requested an irq */ -static inline int irq_has_action(unsigned int irq) +/* + * Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and + * irq_free_desc instead. + */ +extern void dynamic_irq_cleanup(unsigned int irq); +static inline void dynamic_irq_init(unsigned int irq) { - struct irq_desc *desc = irq_to_desc(irq); - return desc->action != NULL; + dynamic_irq_cleanup(irq); } -/* Dynamic irq helper functions */ -extern void dynamic_irq_init(unsigned int irq); -void dynamic_irq_init_keep_chip_data(unsigned int irq); -extern void dynamic_irq_cleanup(unsigned int irq); -void dynamic_irq_cleanup_keep_chip_data(unsigned int irq); - /* Set/get chip/data for an IRQ: */ extern int set_irq_chip(unsigned int irq, struct irq_chip *chip); extern int set_irq_data(unsigned int irq, void *data); extern int set_irq_chip_data(unsigned int irq, void *data); extern int set_irq_type(unsigned int irq, unsigned int type); extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); +extern struct irq_data *irq_get_irq_data(unsigned int irq); -#define get_irq_chip(irq) (irq_to_desc(irq)->chip) -#define get_irq_chip_data(irq) (irq_to_desc(irq)->chip_data) -#define get_irq_data(irq) (irq_to_desc(irq)->handler_data) -#define get_irq_msi(irq) (irq_to_desc(irq)->msi_desc) - -#define get_irq_desc_chip(desc) ((desc)->chip) -#define get_irq_desc_chip_data(desc) ((desc)->chip_data) -#define get_irq_desc_data(desc) ((desc)->handler_data) -#define get_irq_desc_msi(desc) ((desc)->msi_desc) - -#endif /* CONFIG_GENERIC_HARDIRQS */ - -#endif /* !CONFIG_S390 */ - -#ifdef CONFIG_SMP -/** - * alloc_desc_masks - allocate cpumasks for irq_desc - * @desc: pointer to irq_desc struct - * @node: node which will be handling the cpumasks - * @boot: true if need bootmem - * - * Allocates affinity and pending_mask cpumask if required. - * Returns true if successful (or not required). - */ -static inline bool alloc_desc_masks(struct irq_desc *desc, int node, - bool boot) +static inline struct irq_chip *get_irq_chip(unsigned int irq) { - gfp_t gfp = GFP_ATOMIC; - - if (boot) - gfp = GFP_NOWAIT; - -#ifdef CONFIG_CPUMASK_OFFSTACK - if (!alloc_cpumask_var_node(&desc->affinity, gfp, node)) - return false; + struct irq_data *d = irq_get_irq_data(irq); + return d ? d->chip : NULL; +} -#ifdef CONFIG_GENERIC_PENDING_IRQ - if (!alloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { - free_cpumask_var(desc->affinity); - return false; - } -#endif -#endif - return true; +static inline struct irq_chip *irq_data_get_irq_chip(struct irq_data *d) +{ + return d->chip; } -static inline void init_desc_masks(struct irq_desc *desc) +static inline void *get_irq_chip_data(unsigned int irq) { - cpumask_setall(desc->affinity); -#ifdef CONFIG_GENERIC_PENDING_IRQ - cpumask_clear(desc->pending_mask); -#endif + struct irq_data *d = irq_get_irq_data(irq); + return d ? d->chip_data : NULL; } -/** - * init_copy_desc_masks - copy cpumasks for irq_desc - * @old_desc: pointer to old irq_desc struct - * @new_desc: pointer to new irq_desc struct - * - * Insures affinity and pending_masks are copied to new irq_desc. - * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the - * irq_desc struct so the copy is redundant. - */ +static inline void *irq_data_get_irq_chip_data(struct irq_data *d) +{ + return d->chip_data; +} -static inline void init_copy_desc_masks(struct irq_desc *old_desc, - struct irq_desc *new_desc) +static inline void *get_irq_data(unsigned int irq) { -#ifdef CONFIG_CPUMASK_OFFSTACK - cpumask_copy(new_desc->affinity, old_desc->affinity); + struct irq_data *d = irq_get_irq_data(irq); + return d ? d->handler_data : NULL; +} -#ifdef CONFIG_GENERIC_PENDING_IRQ - cpumask_copy(new_desc->pending_mask, old_desc->pending_mask); -#endif -#endif +static inline void *irq_data_get_irq_data(struct irq_data *d) +{ + return d->handler_data; } -static inline void free_desc_masks(struct irq_desc *old_desc, - struct irq_desc *new_desc) +static inline struct msi_desc *get_irq_msi(unsigned int irq) { - free_cpumask_var(old_desc->affinity); + struct irq_data *d = irq_get_irq_data(irq); + return d ? d->msi_desc : NULL; +} -#ifdef CONFIG_GENERIC_PENDING_IRQ - free_cpumask_var(old_desc->pending_mask); -#endif +static inline struct msi_desc *irq_data_get_msi(struct irq_data *d) +{ + return d->msi_desc; } -#else /* !CONFIG_SMP */ +int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node); +void irq_free_descs(unsigned int irq, unsigned int cnt); +int irq_reserve_irqs(unsigned int from, unsigned int cnt); -static inline bool alloc_desc_masks(struct irq_desc *desc, int node, - bool boot) +static inline int irq_alloc_desc(int node) { - return true; + return irq_alloc_descs(-1, 0, 1, node); } -static inline void init_desc_masks(struct irq_desc *desc) +static inline int irq_alloc_desc_at(unsigned int at, int node) { + return irq_alloc_descs(at, at, 1, node); } -static inline void init_copy_desc_masks(struct irq_desc *old_desc, - struct irq_desc *new_desc) +static inline int irq_alloc_desc_from(unsigned int from, int node) { + return irq_alloc_descs(-1, from, 1, node); } -static inline void free_desc_masks(struct irq_desc *old_desc, - struct irq_desc *new_desc) +static inline void irq_free_desc(unsigned int irq) { + irq_free_descs(irq, 1); } -#endif /* CONFIG_SMP */ + +#endif /* CONFIG_GENERIC_HARDIRQS */ + +#endif /* !CONFIG_S390 */ #endif /* _LINUX_IRQ_H */ diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h new file mode 100644 index 000000000000..4fa09d4d0b71 --- /dev/null +++ b/include/linux/irq_work.h @@ -0,0 +1,20 @@ +#ifndef _LINUX_IRQ_WORK_H +#define _LINUX_IRQ_WORK_H + +struct irq_work { + struct irq_work *next; + void (*func)(struct irq_work *); +}; + +static inline +void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *)) +{ + entry->next = NULL; + entry->func = func; +} + +bool irq_work_queue(struct irq_work *entry); +void irq_work_run(void); +void irq_work_sync(struct irq_work *entry); + +#endif /* _LINUX_IRQ_WORK_H */ diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h new file mode 100644 index 000000000000..979c68cc7458 --- /dev/null +++ b/include/linux/irqdesc.h @@ -0,0 +1,159 @@ +#ifndef _LINUX_IRQDESC_H +#define _LINUX_IRQDESC_H + +/* + * Core internal functions to deal with irq descriptors + * + * This include will move to kernel/irq once we cleaned up the tree. + * For now it's included from <linux/irq.h> + */ + +struct proc_dir_entry; +struct timer_rand_state; +/** + * struct irq_desc - interrupt descriptor + * @irq_data: per irq and chip data passed down to chip functions + * @timer_rand_state: pointer to timer rand state struct + * @kstat_irqs: irq stats per cpu + * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()] + * @action: the irq action chain + * @status: status information + * @depth: disable-depth, for nested irq_disable() calls + * @wake_depth: enable depth, for multiple set_irq_wake() callers + * @irq_count: stats field to detect stalled irqs + * @last_unhandled: aging timer for unhandled count + * @irqs_unhandled: stats field for spurious unhandled interrupts + * @lock: locking for SMP + * @pending_mask: pending rebalanced interrupts + * @threads_active: number of irqaction threads currently running + * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers + * @dir: /proc/irq/ procfs entry + * @name: flow handler name for /proc/interrupts output + */ +struct irq_desc { + +#ifdef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED + struct irq_data irq_data; +#else + /* + * This union will go away, once we fixed the direct access to + * irq_desc all over the place. The direct fields are a 1:1 + * overlay of irq_data. + */ + union { + struct irq_data irq_data; + struct { + unsigned int irq; + unsigned int node; + struct irq_chip *chip; + void *handler_data; + void *chip_data; + struct msi_desc *msi_desc; +#ifdef CONFIG_SMP + cpumask_var_t affinity; +#endif + }; + }; +#endif + + struct timer_rand_state *timer_rand_state; + unsigned int *kstat_irqs; + irq_flow_handler_t handle_irq; + struct irqaction *action; /* IRQ action list */ + unsigned int status; /* IRQ status */ + + unsigned int depth; /* nested irq disables */ + unsigned int wake_depth; /* nested wake enables */ + unsigned int irq_count; /* For detecting broken IRQs */ + unsigned long last_unhandled; /* Aging timer for unhandled count */ + unsigned int irqs_unhandled; + raw_spinlock_t lock; +#ifdef CONFIG_SMP + const struct cpumask *affinity_hint; +#ifdef CONFIG_GENERIC_PENDING_IRQ + cpumask_var_t pending_mask; +#endif +#endif + atomic_t threads_active; + wait_queue_head_t wait_for_threads; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *dir; +#endif + const char *name; +} ____cacheline_internodealigned_in_smp; + +#ifndef CONFIG_SPARSE_IRQ +extern struct irq_desc irq_desc[NR_IRQS]; +#endif + +/* Will be removed once the last users in power and sh are gone */ +extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node); +static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) +{ + return desc; +} + +#ifdef CONFIG_GENERIC_HARDIRQS + +#define get_irq_desc_chip(desc) ((desc)->irq_data.chip) +#define get_irq_desc_chip_data(desc) ((desc)->irq_data.chip_data) +#define get_irq_desc_data(desc) ((desc)->irq_data.handler_data) +#define get_irq_desc_msi(desc) ((desc)->irq_data.msi_desc) + +/* + * Monolithic do_IRQ implementation. + */ +#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ +extern unsigned int __do_IRQ(unsigned int irq); +#endif + +/* + * Architectures call this to let the generic IRQ layer + * handle an interrupt. If the descriptor is attached to an + * irqchip-style controller then we call the ->handle_irq() handler, + * and it calls __do_IRQ() if it's attached to an irqtype-style controller. + */ +static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc) +{ +#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ + desc->handle_irq(irq, desc); +#else + if (likely(desc->handle_irq)) + desc->handle_irq(irq, desc); + else + __do_IRQ(irq); +#endif +} + +static inline void generic_handle_irq(unsigned int irq) +{ + generic_handle_irq_desc(irq, irq_to_desc(irq)); +} + +/* Test to see if a driver has successfully requested an irq */ +static inline int irq_has_action(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + return desc->action != NULL; +} + +static inline int irq_balancing_disabled(unsigned int irq) +{ + struct irq_desc *desc; + + desc = irq_to_desc(irq); + return desc->status & IRQ_NO_BALANCING_MASK; +} + +/* caller has locked the irq_desc and both params are valid */ +static inline void __set_irq_handler_unlocked(int irq, + irq_flow_handler_t handler) +{ + struct irq_desc *desc; + + desc = irq_to_desc(irq); + desc->handle_irq = handler; +} +#endif + +#endif diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 006bf45eae30..d176d658fe25 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -12,6 +12,7 @@ #define _LINUX_TRACE_IRQFLAGS_H #include <linux/typecheck.h> +#include <asm/irqflags.h> #ifdef CONFIG_TRACE_IRQFLAGS extern void trace_softirqs_on(unsigned long ip); @@ -52,17 +53,45 @@ # define start_critical_timings() do { } while (0) #endif -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT - -#include <asm/irqflags.h> +/* + * Wrap the arch provided IRQ routines to provide appropriate checks. + */ +#define raw_local_irq_disable() arch_local_irq_disable() +#define raw_local_irq_enable() arch_local_irq_enable() +#define raw_local_irq_save(flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = arch_local_irq_save(); \ + } while (0) +#define raw_local_irq_restore(flags) \ + do { \ + typecheck(unsigned long, flags); \ + arch_local_irq_restore(flags); \ + } while (0) +#define raw_local_save_flags(flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = arch_local_save_flags(); \ + } while (0) +#define raw_irqs_disabled_flags(flags) \ + ({ \ + typecheck(unsigned long, flags); \ + arch_irqs_disabled_flags(flags); \ + }) +#define raw_irqs_disabled() (arch_irqs_disabled()) +#define raw_safe_halt() arch_safe_halt() +/* + * The local_irq_*() APIs are equal to the raw_local_irq*() + * if !TRACE_IRQFLAGS. + */ +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT #define local_irq_enable() \ do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) #define local_irq_disable() \ do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0) #define local_irq_save(flags) \ do { \ - typecheck(unsigned long, flags); \ raw_local_irq_save(flags); \ trace_hardirqs_off(); \ } while (0) @@ -70,7 +99,6 @@ #define local_irq_restore(flags) \ do { \ - typecheck(unsigned long, flags); \ if (raw_irqs_disabled_flags(flags)) { \ raw_local_irq_restore(flags); \ trace_hardirqs_off(); \ @@ -79,51 +107,44 @@ raw_local_irq_restore(flags); \ } \ } while (0) -#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */ -/* - * The local_irq_*() APIs are equal to the raw_local_irq*() - * if !TRACE_IRQFLAGS. - */ -# define raw_local_irq_disable() local_irq_disable() -# define raw_local_irq_enable() local_irq_enable() -# define raw_local_irq_save(flags) \ - do { \ - typecheck(unsigned long, flags); \ - local_irq_save(flags); \ - } while (0) -# define raw_local_irq_restore(flags) \ +#define local_save_flags(flags) \ do { \ - typecheck(unsigned long, flags); \ - local_irq_restore(flags); \ + raw_local_save_flags(flags); \ } while (0) -#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT -#define safe_halt() \ - do { \ - trace_hardirqs_on(); \ - raw_safe_halt(); \ - } while (0) +#define irqs_disabled_flags(flags) \ + ({ \ + raw_irqs_disabled_flags(flags); \ + }) -#define local_save_flags(flags) \ - do { \ - typecheck(unsigned long, flags); \ - raw_local_save_flags(flags); \ +#define irqs_disabled() \ + ({ \ + unsigned long _flags; \ + raw_local_save_flags(_flags); \ + raw_irqs_disabled_flags(_flags); \ + }) + +#define safe_halt() \ + do { \ + trace_hardirqs_on(); \ + raw_safe_halt(); \ } while (0) -#define irqs_disabled() \ -({ \ - unsigned long _flags; \ - \ - raw_local_save_flags(_flags); \ - raw_irqs_disabled_flags(_flags); \ -}) -#define irqs_disabled_flags(flags) \ -({ \ - typecheck(unsigned long, flags); \ - raw_irqs_disabled_flags(flags); \ -}) +#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */ + +#define local_irq_enable() do { raw_local_irq_enable(); } while (0) +#define local_irq_disable() do { raw_local_irq_disable(); } while (0) +#define local_irq_save(flags) \ + do { \ + raw_local_irq_save(flags); \ + } while (0) +#define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0) +#define local_save_flags(flags) do { raw_local_save_flags(flags); } while (0) +#define irqs_disabled() (raw_irqs_disabled()) +#define irqs_disabled_flags(flags) (raw_irqs_disabled_flags(flags)) +#define safe_halt() do { raw_safe_halt(); } while (0) + #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ #endif diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 7bf89bc8cbca..05aa8c23483f 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -25,6 +25,7 @@ extern int nr_irqs; extern struct irq_desc *irq_to_desc(unsigned int irq); +unsigned int irq_get_next_irq(unsigned int offset); # define for_each_irq_desc(irq, desc) \ for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ @@ -47,6 +48,10 @@ extern struct irq_desc *irq_to_desc(unsigned int irq); #define irq_node(irq) 0 #endif +# define for_each_active_irq(irq) \ + for (irq = irq_get_next_irq(0); irq < nr_irqs; \ + irq = irq_get_next_irq(irq + 1)) + #endif /* CONFIG_GENERIC_HARDIRQS */ #define for_each_irq_nr(irq) \ diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h new file mode 100644 index 000000000000..b67cb180e6e9 --- /dev/null +++ b/include/linux/jump_label.h @@ -0,0 +1,74 @@ +#ifndef _LINUX_JUMP_LABEL_H +#define _LINUX_JUMP_LABEL_H + +#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL) +# include <asm/jump_label.h> +# define HAVE_JUMP_LABEL +#endif + +enum jump_label_type { + JUMP_LABEL_ENABLE, + JUMP_LABEL_DISABLE +}; + +struct module; + +#ifdef HAVE_JUMP_LABEL + +extern struct jump_entry __start___jump_table[]; +extern struct jump_entry __stop___jump_table[]; + +extern void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type); +extern void arch_jump_label_text_poke_early(jump_label_t addr); +extern void jump_label_update(unsigned long key, enum jump_label_type type); +extern void jump_label_apply_nops(struct module *mod); +extern int jump_label_text_reserved(void *start, void *end); + +#define jump_label_enable(key) \ + jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE); + +#define jump_label_disable(key) \ + jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE); + +#else + +#define JUMP_LABEL(key, label) \ +do { \ + if (unlikely(*key)) \ + goto label; \ +} while (0) + +#define jump_label_enable(cond_var) \ +do { \ + *(cond_var) = 1; \ +} while (0) + +#define jump_label_disable(cond_var) \ +do { \ + *(cond_var) = 0; \ +} while (0) + +static inline int jump_label_apply_nops(struct module *mod) +{ + return 0; +} + +static inline int jump_label_text_reserved(void *start, void *end) +{ + return 0; +} + +#endif + +#define COND_STMT(key, stmt) \ +do { \ + __label__ jl_enabled; \ + JUMP_LABEL(key, jl_enabled); \ + if (0) { \ +jl_enabled: \ + stmt; \ + } \ +} while (0) + +#endif diff --git a/include/linux/jump_label_ref.h b/include/linux/jump_label_ref.h new file mode 100644 index 000000000000..e5d012ad92c6 --- /dev/null +++ b/include/linux/jump_label_ref.h @@ -0,0 +1,44 @@ +#ifndef _LINUX_JUMP_LABEL_REF_H +#define _LINUX_JUMP_LABEL_REF_H + +#include <linux/jump_label.h> +#include <asm/atomic.h> + +#ifdef HAVE_JUMP_LABEL + +static inline void jump_label_inc(atomic_t *key) +{ + if (atomic_add_return(1, key) == 1) + jump_label_enable(key); +} + +static inline void jump_label_dec(atomic_t *key) +{ + if (atomic_dec_and_test(key)) + jump_label_disable(key); +} + +#else /* !HAVE_JUMP_LABEL */ + +static inline void jump_label_inc(atomic_t *key) +{ + atomic_inc(key); +} + +static inline void jump_label_dec(atomic_t *key) +{ + atomic_dec(key); +} + +#undef JUMP_LABEL +#define JUMP_LABEL(key, label) \ +do { \ + if (unlikely(__builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(key), atomic_t *), \ + atomic_read((atomic_t *)(key)), *(key)))) \ + goto label; \ +} while (0) + +#endif /* HAVE_JUMP_LABEL */ + +#endif /* _LINUX_JUMP_LABEL_REF_H */ diff --git a/include/linux/kdb.h b/include/linux/kdb.h index ea6e5244ed3f..aadff7cc2b84 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -28,6 +28,41 @@ extern int kdb_poll_idx; extern int kdb_initial_cpu; extern atomic_t kdb_event; +/* Types and messages used for dynamically added kdb shell commands */ + +#define KDB_MAXARGS 16 /* Maximum number of arguments to a function */ + +typedef enum { + KDB_REPEAT_NONE = 0, /* Do not repeat this command */ + KDB_REPEAT_NO_ARGS, /* Repeat the command without arguments */ + KDB_REPEAT_WITH_ARGS, /* Repeat the command including its arguments */ +} kdb_repeat_t; + +typedef int (*kdb_func_t)(int, const char **); + +/* KDB return codes from a command or internal kdb function */ +#define KDB_NOTFOUND (-1) +#define KDB_ARGCOUNT (-2) +#define KDB_BADWIDTH (-3) +#define KDB_BADRADIX (-4) +#define KDB_NOTENV (-5) +#define KDB_NOENVVALUE (-6) +#define KDB_NOTIMP (-7) +#define KDB_ENVFULL (-8) +#define KDB_ENVBUFFULL (-9) +#define KDB_TOOMANYBPT (-10) +#define KDB_TOOMANYDBREGS (-11) +#define KDB_DUPBPT (-12) +#define KDB_BPTNOTFOUND (-13) +#define KDB_BADMODE (-14) +#define KDB_BADINT (-15) +#define KDB_INVADDRFMT (-16) +#define KDB_BADREG (-17) +#define KDB_BADCPUNUM (-18) +#define KDB_BADLENGTH (-19) +#define KDB_NOBP (-20) +#define KDB_BADADDR (-21) + /* * kdb_diemsg * @@ -104,10 +139,26 @@ int kdb_process_cpu(const struct task_struct *p) /* kdb access to register set for stack dumping */ extern struct pt_regs *kdb_current_regs; +#ifdef CONFIG_KALLSYMS +extern const char *kdb_walk_kallsyms(loff_t *pos); +#else /* ! CONFIG_KALLSYMS */ +static inline const char *kdb_walk_kallsyms(loff_t *pos) +{ + return NULL; +} +#endif /* ! CONFIG_KALLSYMS */ +/* Dynamic kdb shell command registration */ +extern int kdb_register(char *, kdb_func_t, char *, char *, short); +extern int kdb_register_repeat(char *, kdb_func_t, char *, char *, + short, kdb_repeat_t); +extern int kdb_unregister(char *); #else /* ! CONFIG_KGDB_KDB */ #define kdb_printf(...) #define kdb_init(x) +#define kdb_register(...) +#define kdb_register_repeat(...) +#define kdb_uregister(x) #endif /* CONFIG_KGDB_KDB */ enum { KDB_NOT_INITIALIZED, diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2b0a35e6bc69..edef168a0406 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -58,7 +58,18 @@ extern const char linux_proc_banner[]; #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) -#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) +#define roundup(x, y) ( \ +{ \ + typeof(y) __y = y; \ + (((x) + (__y - 1)) / __y) * __y; \ +} \ +) +#define rounddown(x, y) ( \ +{ \ + typeof(x) __x = (x); \ + __x - (__x % (y)); \ +} \ +) #define DIV_ROUND_CLOSEST(x, divisor)( \ { \ typeof(divisor) __divisor = divisor; \ @@ -641,6 +652,16 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } _max1 > _max2 ? _max1 : _max2; }) /** + * min_not_zero - return the minimum that is _not_ zero, unless both are zero + * @x: value1 + * @y: value2 + */ +#define min_not_zero(x, y) ({ \ + typeof(x) __x = (x); \ + typeof(y) __y = (y); \ + __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) + +/** * clamp - return a value clamped to a given range with strict typechecking * @val: current value * @min: minimum allowable value diff --git a/include/linux/key.h b/include/linux/key.h index cd50dfa1d4c2..3db0adce1fda 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -178,8 +178,9 @@ struct key { */ union { unsigned long value; + void __rcu *rcudata; void *data; - struct keyring_list *subscriptions; + struct keyring_list __rcu *subscriptions; } payload; }; diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 7950a37a7146..8f6d12151048 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -191,6 +191,8 @@ static inline struct kobj_type *get_ktype(struct kobject *kobj) } extern struct kobject *kset_find_obj(struct kset *, const char *); +extern struct kobject *kset_find_obj_hinted(struct kset *, const char *, + struct kobject *); /* The global /sys/kernel/ kobject for people to chain off of */ extern struct kobject *kernel_kobj; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c13cc48697aa..ac740b26eb10 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -205,7 +205,7 @@ struct kvm { struct mutex irq_lock; #ifdef CONFIG_HAVE_KVM_IRQCHIP - struct kvm_irq_routing_table *irq_routing; + struct kvm_irq_routing_table __rcu *irq_routing; struct hlist_head mask_notifier_list; struct hlist_head irq_ack_notifier_list; #endif diff --git a/include/linux/libata.h b/include/linux/libata.h index 45fb2967b66d..15b77b8dc7e1 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -37,6 +37,7 @@ #include <scsi/scsi_host.h> #include <linux/acpi.h> #include <linux/cdrom.h> +#include <linux/sched.h> /* * Define if arch has non-standard setup. This is a _PCI_ standard @@ -172,6 +173,7 @@ enum { ATA_LFLAG_NO_RETRY = (1 << 5), /* don't retry this link */ ATA_LFLAG_DISABLED = (1 << 6), /* link is disabled */ ATA_LFLAG_SW_ACTIVITY = (1 << 7), /* keep activity stats */ + ATA_LFLAG_NO_LPM = (1 << 8), /* disable LPM on this link */ /* struct ata_port flags */ ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */ @@ -196,7 +198,7 @@ enum { ATA_FLAG_ACPI_SATA = (1 << 17), /* need native SATA ACPI layout */ ATA_FLAG_AN = (1 << 18), /* controller supports AN */ ATA_FLAG_PMP = (1 << 19), /* controller supports PMP */ - ATA_FLAG_IPM = (1 << 20), /* driver can handle IPM */ + ATA_FLAG_LPM = (1 << 20), /* driver can handle LPM */ ATA_FLAG_EM = (1 << 21), /* driver supports enclosure * management */ ATA_FLAG_SW_ACTIVITY = (1 << 22), /* driver supports sw activity @@ -324,12 +326,11 @@ enum { ATA_EH_HARDRESET = (1 << 2), /* meaningful only in ->prereset */ ATA_EH_RESET = ATA_EH_SOFTRESET | ATA_EH_HARDRESET, ATA_EH_ENABLE_LINK = (1 << 3), - ATA_EH_LPM = (1 << 4), /* link power management action */ ATA_EH_PARK = (1 << 5), /* unload heads and stop I/O */ ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_PARK, ATA_EH_ALL_ACTIONS = ATA_EH_REVALIDATE | ATA_EH_RESET | - ATA_EH_ENABLE_LINK | ATA_EH_LPM, + ATA_EH_ENABLE_LINK, /* ata_eh_info->flags */ ATA_EHI_HOTPLUGGED = (1 << 0), /* could have been hotplugged */ @@ -341,7 +342,7 @@ enum { ATA_EHI_DID_HARDRESET = (1 << 17), /* already soft-reset this port */ ATA_EHI_PRINTINFO = (1 << 18), /* print configuration info */ ATA_EHI_SETMODE = (1 << 19), /* configure transfer mode */ - ATA_EHI_POST_SETMODE = (1 << 20), /* revaildating after setmode */ + ATA_EHI_POST_SETMODE = (1 << 20), /* revalidating after setmode */ ATA_EHI_DID_RESET = ATA_EHI_DID_SOFTRESET | ATA_EHI_DID_HARDRESET, @@ -377,7 +378,6 @@ enum { ATA_HORKAGE_BROKEN_HPA = (1 << 4), /* Broken HPA */ ATA_HORKAGE_DISABLE = (1 << 5), /* Disable it */ ATA_HORKAGE_HPA_SIZE = (1 << 6), /* native size off by one */ - ATA_HORKAGE_IPM = (1 << 7), /* Link PM problems */ ATA_HORKAGE_IVB = (1 << 8), /* cbl det validity bit bugs */ ATA_HORKAGE_STUCK_ERR = (1 << 9), /* stuck ERR on next PACKET */ ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ @@ -464,6 +464,22 @@ enum ata_completion_errors { AC_ERR_NCQ = (1 << 10), /* marker for offending NCQ qc */ }; +/* + * Link power management policy: If you alter this, you also need to + * alter libata-scsi.c (for the ascii descriptions) + */ +enum ata_lpm_policy { + ATA_LPM_UNKNOWN, + ATA_LPM_MAX_POWER, + ATA_LPM_MED_POWER, + ATA_LPM_MIN_POWER, +}; + +enum ata_lpm_hints { + ATA_LPM_EMPTY = (1 << 0), /* port empty/probing */ + ATA_LPM_HIPM = (1 << 1), /* may use HIPM */ +}; + /* forward declarations */ struct scsi_device; struct ata_port_operations; @@ -478,16 +494,6 @@ typedef int (*ata_reset_fn_t)(struct ata_link *link, unsigned int *classes, unsigned long deadline); typedef void (*ata_postreset_fn_t)(struct ata_link *link, unsigned int *classes); -/* - * host pm policy: If you alter this, you also need to alter libata-scsi.c - * (for the ascii descriptions) - */ -enum link_pm { - NOT_AVAILABLE, - MIN_POWER, - MAX_PERFORMANCE, - MEDIUM_POWER, -}; extern struct device_attribute dev_attr_link_power_management_policy; extern struct device_attribute dev_attr_unload_heads; extern struct device_attribute dev_attr_em_message_type; @@ -530,6 +536,10 @@ struct ata_host { void *private_data; struct ata_port_operations *ops; unsigned long flags; + + struct mutex eh_mutex; + struct task_struct *eh_owner; + #ifdef CONFIG_ATA_ACPI acpi_handle acpi_handle; #endif @@ -560,13 +570,13 @@ struct ata_queued_cmd { unsigned int extrabytes; unsigned int curbytes; - struct scatterlist *cursg; - unsigned int cursg_ofs; - struct scatterlist sgent; struct scatterlist *sg; + struct scatterlist *cursg; + unsigned int cursg_ofs; + unsigned int err_mask; struct ata_taskfile result_tf; ata_qc_cb_t complete_fn; @@ -604,6 +614,7 @@ struct ata_device { union acpi_object *gtf_cache; unsigned int gtf_filter; #endif + struct device tdev; /* n_sector is CLEAR_BEGIN, read comment above CLEAR_BEGIN */ u64 n_sectors; /* size of device, if ATA */ u64 n_native_sectors; /* native size, if ATA */ @@ -690,6 +701,7 @@ struct ata_link { struct ata_port *ap; int pmp; /* port multiplier port # */ + struct device tdev; unsigned int active_tag; /* active tag on this link */ u32 sactive; /* active NCQ commands */ @@ -699,6 +711,7 @@ struct ata_link { unsigned int hw_sata_spd_limit; unsigned int sata_spd_limit; unsigned int sata_spd; /* current SATA PHY speed */ + enum ata_lpm_policy lpm_policy; /* record runtime error info, protected by host_set lock */ struct ata_eh_info eh_info; @@ -707,6 +720,8 @@ struct ata_link { struct ata_device device[ATA_MAX_DEVICES]; }; +#define ATA_LINK_CLEAR_BEGIN offsetof(struct ata_link, active_tag) +#define ATA_LINK_CLEAR_END offsetof(struct ata_link, device[0]) struct ata_port { struct Scsi_Host *scsi_host; /* our co-allocated scsi host */ @@ -752,6 +767,7 @@ struct ata_port { struct ata_port_stats stats; struct ata_host *host; struct device *dev; + struct device tdev; struct mutex scsi_scan_mutex; struct delayed_work hotplug_task; @@ -767,7 +783,7 @@ struct ata_port { pm_message_t pm_mesg; int *pm_result; - enum link_pm pm_policy; + enum ata_lpm_policy target_lpm_policy; struct timer_list fastdrain_timer; unsigned long fastdrain_cnt; @@ -833,8 +849,8 @@ struct ata_port_operations { int (*scr_write)(struct ata_link *link, unsigned int sc_reg, u32 val); void (*pmp_attach)(struct ata_port *ap); void (*pmp_detach)(struct ata_port *ap); - int (*enable_pm)(struct ata_port *ap, enum link_pm policy); - void (*disable_pm)(struct ata_port *ap); + int (*set_lpm)(struct ata_link *link, enum ata_lpm_policy policy, + unsigned hints); /* * Start, stop, suspend and resume @@ -946,6 +962,8 @@ extern int sata_link_debounce(struct ata_link *link, const unsigned long *params, unsigned long deadline); extern int sata_link_resume(struct ata_link *link, const unsigned long *params, unsigned long deadline); +extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, + bool spm_wakeup); extern int sata_link_hardreset(struct ata_link *link, const unsigned long *timing, unsigned long deadline, bool *online, int (*check_ready)(struct ata_link *)); @@ -991,8 +1009,9 @@ extern int ata_host_suspend(struct ata_host *host, pm_message_t mesg); extern void ata_host_resume(struct ata_host *host); #endif extern int ata_ratelimit(void); -extern u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val, - unsigned long interval, unsigned long timeout); +extern void ata_msleep(struct ata_port *ap, unsigned int msecs); +extern u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, + u32 val, unsigned long interval, unsigned long timeout); extern int atapi_cmd_type(u8 opcode); extern void ata_tf_to_fis(const struct ata_taskfile *tf, u8 pmp, int is_cmd, u8 *fis); diff --git a/include/linux/list.h b/include/linux/list.h index d167b5d7c0ac..88a000617d77 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -5,7 +5,6 @@ #include <linux/stddef.h> #include <linux/poison.h> #include <linux/prefetch.h> -#include <asm/system.h> /* * Simple doubly linked list implementation. diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 06aed8305bf3..71c09b26c759 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -32,6 +32,17 @@ extern int lock_stat; #define MAX_LOCKDEP_SUBCLASSES 8UL /* + * NR_LOCKDEP_CACHING_CLASSES ... Number of classes + * cached in the instance of lockdep_map + * + * Currently main class (subclass == 0) and signle depth subclass + * are cached in lockdep_map. This optimization is mainly targeting + * on rq->lock. double_rq_lock() acquires this highly competitive with + * single depth. + */ +#define NR_LOCKDEP_CACHING_CLASSES 2 + +/* * Lock-classes are keyed via unique addresses, by embedding the * lockclass-key into the kernel (or module) .data section. (For * static locks we use the lock address itself as the key.) @@ -138,7 +149,7 @@ void clear_lock_stats(struct lock_class *class); */ struct lockdep_map { struct lock_class_key *key; - struct lock_class *class_cache; + struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES]; const char *name; #ifdef CONFIG_LOCK_STAT int cpu; @@ -424,14 +435,6 @@ do { \ #endif /* CONFIG_LOCKDEP */ -#ifdef CONFIG_GENERIC_HARDIRQS -extern void early_init_irq_lock_class(void); -#else -static inline void early_init_irq_lock_class(void) -{ -} -#endif - #ifdef CONFIG_TRACE_IRQFLAGS extern void early_boot_irqs_off(void); extern void early_boot_irqs_on(void); diff --git a/include/linux/memblock.h b/include/linux/memblock.h index a59faf2b5edd..62a10c2a11f2 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -2,6 +2,7 @@ #define _LINUX_MEMBLOCK_H #ifdef __KERNEL__ +#ifdef CONFIG_HAVE_MEMBLOCK /* * Logical memory blocks. * @@ -16,73 +17,150 @@ #include <linux/init.h> #include <linux/mm.h> -#define MAX_MEMBLOCK_REGIONS 128 +#include <asm/memblock.h> -struct memblock_property { - u64 base; - u64 size; -}; +#define INIT_MEMBLOCK_REGIONS 128 +#define MEMBLOCK_ERROR 0 struct memblock_region { - unsigned long cnt; - u64 size; - struct memblock_property region[MAX_MEMBLOCK_REGIONS+1]; + phys_addr_t base; + phys_addr_t size; +}; + +struct memblock_type { + unsigned long cnt; /* number of regions */ + unsigned long max; /* size of the allocated array */ + struct memblock_region *regions; }; struct memblock { - unsigned long debug; - u64 rmo_size; - struct memblock_region memory; - struct memblock_region reserved; + phys_addr_t current_limit; + phys_addr_t memory_size; /* Updated by memblock_analyze() */ + struct memblock_type memory; + struct memblock_type reserved; }; extern struct memblock memblock; +extern int memblock_debug; +extern int memblock_can_resize; -extern void __init memblock_init(void); -extern void __init memblock_analyze(void); -extern long memblock_add(u64 base, u64 size); -extern long memblock_remove(u64 base, u64 size); -extern long __init memblock_free(u64 base, u64 size); -extern long __init memblock_reserve(u64 base, u64 size); -extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid, - u64 (*nid_range)(u64, u64, int *)); -extern u64 __init memblock_alloc(u64 size, u64 align); -extern u64 __init memblock_alloc_base(u64 size, - u64, u64 max_addr); -extern u64 __init __memblock_alloc_base(u64 size, - u64 align, u64 max_addr); -extern u64 __init memblock_phys_mem_size(void); -extern u64 memblock_end_of_DRAM(void); -extern void __init memblock_enforce_memory_limit(u64 memory_limit); -extern int __init memblock_is_reserved(u64 addr); -extern int memblock_is_region_reserved(u64 base, u64 size); -extern int memblock_find(struct memblock_property *res); +#define memblock_dbg(fmt, ...) \ + if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) + +u64 memblock_find_in_range(u64 start, u64 end, u64 size, u64 align); +int memblock_free_reserved_regions(void); +int memblock_reserve_reserved_regions(void); + +extern void memblock_init(void); +extern void memblock_analyze(void); +extern long memblock_add(phys_addr_t base, phys_addr_t size); +extern long memblock_remove(phys_addr_t base, phys_addr_t size); +extern long memblock_free(phys_addr_t base, phys_addr_t size); +extern long memblock_reserve(phys_addr_t base, phys_addr_t size); + +/* The numa aware allocator is only available if + * CONFIG_ARCH_POPULATES_NODE_MAP is set + */ +extern phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, + int nid); +extern phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, + int nid); + +extern phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align); + +/* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ +#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) +#define MEMBLOCK_ALLOC_ACCESSIBLE 0 + +extern phys_addr_t memblock_alloc_base(phys_addr_t size, + phys_addr_t align, + phys_addr_t max_addr); +extern phys_addr_t __memblock_alloc_base(phys_addr_t size, + phys_addr_t align, + phys_addr_t max_addr); +extern phys_addr_t memblock_phys_mem_size(void); +extern phys_addr_t memblock_end_of_DRAM(void); +extern void memblock_enforce_memory_limit(phys_addr_t memory_limit); +extern int memblock_is_memory(phys_addr_t addr); +extern int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); +extern int memblock_is_reserved(phys_addr_t addr); +extern int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); extern void memblock_dump_all(void); -static inline u64 -memblock_size_bytes(struct memblock_region *type, unsigned long region_nr) +/* Provided by the architecture */ +extern phys_addr_t memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid); +extern int memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, + phys_addr_t addr2, phys_addr_t size2); + +/** + * memblock_set_current_limit - Set the current allocation limit to allow + * limiting allocations to what is currently + * accessible during boot + * @limit: New limit value (physical address) + */ +extern void memblock_set_current_limit(phys_addr_t limit); + + +/* + * pfn conversion functions + * + * While the memory MEMBLOCKs should always be page aligned, the reserved + * MEMBLOCKs may not be. This accessor attempt to provide a very clear + * idea of what they return for such non aligned MEMBLOCKs. + */ + +/** + * memblock_region_memory_base_pfn - Return the lowest pfn intersecting with the memory region + * @reg: memblock_region structure + */ +static inline unsigned long memblock_region_memory_base_pfn(const struct memblock_region *reg) { - return type->region[region_nr].size; + return PFN_UP(reg->base); } -static inline u64 -memblock_size_pages(struct memblock_region *type, unsigned long region_nr) + +/** + * memblock_region_memory_end_pfn - Return the end_pfn this region + * @reg: memblock_region structure + */ +static inline unsigned long memblock_region_memory_end_pfn(const struct memblock_region *reg) { - return memblock_size_bytes(type, region_nr) >> PAGE_SHIFT; + return PFN_DOWN(reg->base + reg->size); } -static inline u64 -memblock_start_pfn(struct memblock_region *type, unsigned long region_nr) + +/** + * memblock_region_reserved_base_pfn - Return the lowest pfn intersecting with the reserved region + * @reg: memblock_region structure + */ +static inline unsigned long memblock_region_reserved_base_pfn(const struct memblock_region *reg) { - return type->region[region_nr].base >> PAGE_SHIFT; + return PFN_DOWN(reg->base); } -static inline u64 -memblock_end_pfn(struct memblock_region *type, unsigned long region_nr) + +/** + * memblock_region_reserved_end_pfn - Return the end_pfn this region + * @reg: memblock_region structure + */ +static inline unsigned long memblock_region_reserved_end_pfn(const struct memblock_region *reg) { - return memblock_start_pfn(type, region_nr) + - memblock_size_pages(type, region_nr); + return PFN_UP(reg->base + reg->size); } -#include <asm/memblock.h> +#define for_each_memblock(memblock_type, region) \ + for (region = memblock.memblock_type.regions; \ + region < (memblock.memblock_type.regions + memblock.memblock_type.cnt); \ + region++) + + +#ifdef ARCH_DISCARD_MEMBLOCK +#define __init_memblock __init +#define __initdata_memblock __initdata +#else +#define __init_memblock +#define __initdata_memblock +#endif + +#endif /* CONFIG_HAVE_MEMBLOCK */ #endif /* __KERNEL__ */ diff --git a/include/linux/memory.h b/include/linux/memory.h index 85582e1bcee9..06c1fa0a5c7b 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -23,6 +23,8 @@ struct memory_block { unsigned long phys_index; unsigned long state; + int section_count; + /* * This serializes all state change requests. It isn't * held during creation because the control files are @@ -113,6 +115,8 @@ extern int memory_dev_init(void); extern int remove_memory_block(unsigned long, struct mem_section *, int); extern int memory_notify(unsigned long val, void *v); extern int memory_isolate_notify(unsigned long val, void *v); +extern struct memory_block *find_memory_block_hinted(struct mem_section *, + struct memory_block *); extern struct memory_block *find_memory_block(struct mem_section *); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT) enum mem_add_context { BOOT, HOTPLUG }; diff --git a/include/linux/mfd/tc35892.h b/include/linux/mfd/tc35892.h index e47f770d3068..eff3094ca84e 100644 --- a/include/linux/mfd/tc35892.h +++ b/include/linux/mfd/tc35892.h @@ -111,9 +111,13 @@ extern int tc35892_set_bits(struct tc35892 *tc35892, u8 reg, u8 mask, u8 val); * struct tc35892_gpio_platform_data - TC35892 GPIO platform data * @gpio_base: first gpio number assigned to TC35892. A maximum of * %TC35892_NR_GPIOS GPIOs will be allocated. + * @setup: callback for board-specific initialization + * @remove: callback for board-specific teardown */ struct tc35892_gpio_platform_data { int gpio_base; + void (*setup)(struct tc35892 *tc35892, unsigned gpio_base); + void (*remove)(struct tc35892 *tc35892, unsigned gpio_base); }; /** diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 0f82293a82ed..78a1b9671752 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -56,6 +56,7 @@ enum { MLX4_CMD_QUERY_HCA = 0xb, MLX4_CMD_QUERY_PORT = 0x43, MLX4_CMD_SENSE_PORT = 0x4d, + MLX4_CMD_HW_HEALTH_CHECK = 0x50, MLX4_CMD_SET_PORT = 0xc, MLX4_CMD_ACCESS_DDR = 0x2e, MLX4_CMD_MAP_ICM = 0xffa, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 7a7f9c1e679a..7338654c02b4 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -186,6 +186,10 @@ struct mlx4_caps { int eth_mtu_cap[MLX4_MAX_PORTS + 1]; int gid_table_len[MLX4_MAX_PORTS + 1]; int pkey_table_len[MLX4_MAX_PORTS + 1]; + int trans_type[MLX4_MAX_PORTS + 1]; + int vendor_oui[MLX4_MAX_PORTS + 1]; + int wavelength[MLX4_MAX_PORTS + 1]; + u64 trans_code[MLX4_MAX_PORTS + 1]; int local_ca_ack_delay; int num_uars; int bf_reg_size; @@ -229,6 +233,8 @@ struct mlx4_caps { u32 bmme_flags; u32 reserved_lkey; u16 stat_rate_support; + int udp_rss; + int loopback_support; u8 port_width_cap[MLX4_MAX_PORTS + 1]; int max_gso_sz; int reserved_qps_cnt[MLX4_NUM_QP_REGION]; @@ -480,5 +486,6 @@ void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u32 *lkey, u32 *rkey); int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_SYNC_TPT(struct mlx4_dev *dev); +int mlx4_test_interrupts(struct mlx4_dev *dev); #endif /* MLX4_DEVICE_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 74949fbef8c6..7687228dd3b7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1175,6 +1175,8 @@ extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); int add_from_early_node_map(struct range *range, int az, int nr_range, int nid); +u64 __init find_memory_core_early(int nid, u64 size, u64 align, + u64 goal, u64 limit); void *__alloc_memory_core_early(int nodeid, u64 size, u64 align, u64 goal, u64 limit); typedef int (*work_fn_t)(unsigned long, unsigned long, void *); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ee7e258627f9..cb57d657ce4d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -299,7 +299,7 @@ struct mm_struct { * new_owner->mm == mm * new_owner->alloc_lock is held */ - struct task_struct *owner; + struct task_struct __rcu *owner; #endif #ifdef CONFIG_PROC_FS diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 33b2ea09a4ad..a36ab3bc7b03 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -18,6 +18,7 @@ #define SDIO_CLASS_PHS 0x06 /* PHS standard interface */ #define SDIO_CLASS_WLAN 0x07 /* WLAN interface */ #define SDIO_CLASS_ATA 0x08 /* Embedded SDIO-ATA std interface */ +#define SDIO_CLASS_BT_AMP 0x09 /* Type-A Bluetooth AMP interface */ /* * Vendors and devices. Sort key: vendor first, device next. diff --git a/include/linux/module.h b/include/linux/module.h index 8a6b9fdc7ffa..b29e7458b966 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -350,7 +350,10 @@ struct module struct tracepoint *tracepoints; unsigned int num_tracepoints; #endif - +#ifdef HAVE_JUMP_LABEL + struct jump_entry *jump_entries; + unsigned int num_jump_entries; +#endif #ifdef CONFIG_TRACING const char **trace_bprintk_fmt_start; unsigned int num_trace_bprintk_fmt; @@ -686,17 +689,16 @@ extern int module_sysfs_initialized; #ifdef CONFIG_GENERIC_BUG -int module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, +void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, struct module *); void module_bug_cleanup(struct module *); #else /* !CONFIG_GENERIC_BUG */ -static inline int module_bug_finalize(const Elf_Ehdr *hdr, +static inline void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { - return 0; } static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ diff --git a/include/linux/mroute.h b/include/linux/mroute.h index fa04b246c9ae..0fa7a3a874c8 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -213,6 +213,7 @@ struct mfc_cache { unsigned char ttls[MAXVIFS]; /* TTL thresholds */ } res; } mfc_un; + struct rcu_head rcu; }; #define MFC_STATIC 1 diff --git a/include/linux/msi.h b/include/linux/msi.h index 91b05c171854..05acced439a3 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -10,12 +10,13 @@ struct msi_msg { }; /* Helper functions */ -struct irq_desc; -extern void mask_msi_irq(unsigned int irq); -extern void unmask_msi_irq(unsigned int irq); -extern void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); -extern void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); -extern void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); +struct irq_data; +struct msi_desc; +extern void mask_msi_irq(struct irq_data *data); +extern void unmask_msi_irq(struct irq_data *data); +extern void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); +extern void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg); +extern void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); extern void read_msi_msg(unsigned int irq, struct msi_msg *msg); extern void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg); extern void write_msi_msg(unsigned int irq, struct msi_msg *msg); diff --git a/include/linux/mtio.h b/include/linux/mtio.h index ef01d6aa5934..8f825756c459 100644 --- a/include/linux/mtio.h +++ b/include/linux/mtio.h @@ -63,6 +63,7 @@ struct mtop { #define MTCOMPRESSION 32/* control compression with SCSI mode page 15 */ #define MTSETPART 33 /* Change the active tape partition */ #define MTMKPART 34 /* Format the tape with one or two partitions */ +#define MTWEOFI 35 /* write an end-of-file record (mark) in immediate mode */ /* structure for MTIOCGET - mag tape get status command */ diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h index 4522aed00906..ef663061d5ac 100644 --- a/include/linux/ncp_fs.h +++ b/include/linux/ncp_fs.h @@ -241,34 +241,6 @@ int ncp_mmap(struct file *, struct vm_area_struct *); /* linux/fs/ncpfs/ncplib_kernel.c */ int ncp_make_closed(struct inode *); -#define ncp_namespace(i) (NCP_SERVER(i)->name_space[NCP_FINFO(i)->volNumber]) - -static inline int ncp_preserve_entry_case(struct inode *i, __u32 nscreator) -{ -#ifdef CONFIG_NCPFS_SMALLDOS - int ns = ncp_namespace(i); - - if ((ns == NW_NS_DOS) -#ifdef CONFIG_NCPFS_OS2_NS - || ((ns == NW_NS_OS2) && (nscreator == NW_NS_DOS)) -#endif /* CONFIG_NCPFS_OS2_NS */ - ) - return 0; -#endif /* CONFIG_NCPFS_SMALLDOS */ - return 1; -} - -#define ncp_preserve_case(i) (ncp_namespace(i) != NW_NS_DOS) - -static inline int ncp_case_sensitive(struct inode *i) -{ -#ifdef CONFIG_NCPFS_NFS_NS - return ncp_namespace(i) == NW_NS_NFS; -#else - return 0; -#endif /* CONFIG_NCPFS_NFS_NS */ -} - #endif /* __KERNEL__ */ #endif /* _LINUX_NCP_FS_H */ diff --git a/include/linux/ncp_fs_sb.h b/include/linux/ncp_fs_sb.h index 8da05bc098ca..d64b0e894336 100644 --- a/include/linux/ncp_fs_sb.h +++ b/include/linux/ncp_fs_sb.h @@ -62,6 +62,7 @@ struct ncp_server { int ncp_reply_size; int root_setuped; + struct mutex root_setup_lock; /* info for packet signing */ int sign_wanted; /* 1=Server needs signed packets */ @@ -81,13 +82,14 @@ struct ncp_server { size_t len; void* data; } priv; + struct rw_semaphore auth_rwsem; /* nls info: codepage for volume and charset for I/O */ struct nls_table *nls_vol; struct nls_table *nls_io; /* maximum age in jiffies */ - int dentry_ttl; + atomic_t dentry_ttl; /* miscellaneous */ unsigned int flags; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 46c36ffe20ee..fcd3dda86322 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -228,9 +228,9 @@ struct netdev_hw_addr { #define NETDEV_HW_ADDR_T_SLAVE 3 #define NETDEV_HW_ADDR_T_UNICAST 4 #define NETDEV_HW_ADDR_T_MULTICAST 5 - int refcount; bool synced; bool global_use; + int refcount; struct rcu_head rcu_head; }; @@ -281,6 +281,12 @@ struct hh_cache { unsigned long hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)]; }; +static inline void hh_cache_put(struct hh_cache *hh) +{ + if (atomic_dec_and_test(&hh->hh_refcnt)) + kfree(hh); +} + /* Reserve HH_DATA_MOD byte aligned hard_header_len, but at least that much. * Alternative is: * dev->hard_header_len ? (dev->hard_header_len + @@ -884,6 +890,9 @@ struct net_device { int iflink; struct net_device_stats stats; + atomic_long_t rx_dropped; /* dropped packets by core network + * Do not use this in drivers. + */ #ifdef CONFIG_WIRELESS_EXT /* List of functions to handle Wireless Extensions (instead of ioctl). @@ -901,7 +910,7 @@ struct net_device { unsigned int flags; /* interface flags (a la BSD) */ unsigned short gflags; - unsigned short priv_flags; /* Like 'flags' but invisible to userspace. */ + unsigned int priv_flags; /* Like 'flags' but invisible to userspace. */ unsigned short padded; /* How much padding added by alloc_netdev() */ unsigned char operstate; /* RFC2863 operstate */ @@ -918,10 +927,6 @@ struct net_device { unsigned short needed_headroom; unsigned short needed_tailroom; - struct net_device *master; /* Pointer to master device of a group, - * which this device is member of. - */ - /* Interface address info. */ unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */ unsigned char addr_assign_type; /* hw address assignment type */ @@ -937,12 +942,15 @@ struct net_device { /* Protocol specific pointers */ - + +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + struct vlan_group *vlgrp; /* VLAN group */ +#endif #ifdef CONFIG_NET_DSA void *dsa_ptr; /* dsa specific data */ #endif void *atalk_ptr; /* AppleTalk link */ - void *ip_ptr; /* IPv4 specific data */ + struct in_device __rcu *ip_ptr; /* IPv4 specific data */ void *dn_ptr; /* DECnet specific data */ void *ip6_ptr; /* IPv6 specific data */ void *ec_ptr; /* Econet specific data */ @@ -951,9 +959,20 @@ struct net_device { assign before registering */ /* - * Cache line mostly used on receive path (including eth_type_trans()) + * Cache lines mostly used on receive path (including eth_type_trans()) */ - unsigned long last_rx; /* Time of last Rx */ + unsigned long last_rx; /* Time of last Rx + * This should not be set in + * drivers, unless really needed, + * because network stack (bonding) + * use it if/when necessary, to + * avoid dirtying this cache line. + */ + + struct net_device *master; /* Pointer to master device of a group, + * which this device is member of. + */ + /* Interface address info used in eth_type_trans() */ unsigned char *dev_addr; /* hw address, (before bcast because most packets are @@ -969,14 +988,21 @@ struct net_device { struct netdev_rx_queue *_rx; - /* Number of RX queues allocated at alloc_netdev_mq() time */ + /* Number of RX queues allocated at register_netdev() time */ unsigned int num_rx_queues; + + /* Number of RX queues currently active in device */ + unsigned int real_num_rx_queues; #endif - struct netdev_queue rx_queue; rx_handler_func_t *rx_handler; void *rx_handler_data; + struct netdev_queue __rcu *ingress_queue; + +/* + * Cache lines mostly used on transmit path + */ struct netdev_queue *_tx ____cacheline_aligned_in_smp; /* Number of TX queues allocated at alloc_netdev_mq() time */ @@ -990,9 +1016,7 @@ struct net_device { unsigned long tx_queue_len; /* Max frames per queue allowed */ spinlock_t tx_global_lock; -/* - * One part is mostly used on xmit path (device) - */ + /* These may be needed for future network-power-down code. */ /* @@ -1005,7 +1029,7 @@ struct net_device { struct timer_list watchdog_timer; /* Number of references to this device */ - atomic_t refcnt ____cacheline_aligned_in_smp; + int __percpu *pcpu_refcnt; /* delayed register/unregister */ struct list_head todo_list; @@ -1041,8 +1065,12 @@ struct net_device { #endif /* mid-layer private */ - void *ml_priv; - + union { + void *ml_priv; + struct pcpu_lstats __percpu *lstats; /* loopback stats */ + struct pcpu_tstats __percpu *tstats; /* tunnel stats */ + struct pcpu_dstats __percpu *dstats; /* dummy stats */ + }; /* GARP */ struct garp_port *garp_port; @@ -1305,6 +1333,7 @@ static inline void unregister_netdevice(struct net_device *dev) unregister_netdevice_queue(dev, NULL); } +extern int netdev_refcnt_read(const struct net_device *dev); extern void free_netdev(struct net_device *dev); extern void synchronize_net(void); extern int register_netdevice_notifier(struct notifier_block *nb); @@ -1667,11 +1696,34 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) */ static inline int netif_is_multiqueue(const struct net_device *dev) { - return (dev->num_tx_queues > 1); + return dev->num_tx_queues > 1; } -extern void netif_set_real_num_tx_queues(struct net_device *dev, - unsigned int txq); +extern int netif_set_real_num_tx_queues(struct net_device *dev, + unsigned int txq); + +#ifdef CONFIG_RPS +extern int netif_set_real_num_rx_queues(struct net_device *dev, + unsigned int rxq); +#else +static inline int netif_set_real_num_rx_queues(struct net_device *dev, + unsigned int rxq) +{ + return 0; +} +#endif + +static inline int netif_copy_real_num_queues(struct net_device *to_dev, + const struct net_device *from_dev) +{ + netif_set_real_num_tx_queues(to_dev, from_dev->real_num_tx_queues); +#ifdef CONFIG_RPS + return netif_set_real_num_rx_queues(to_dev, + from_dev->real_num_rx_queues); +#else + return 0; +#endif +} /* Use this variant when it is known for sure that it * is executing from hardware interrupt context or with hardware interrupts @@ -1695,8 +1747,7 @@ extern gro_result_t dev_gro_receive(struct napi_struct *napi, extern gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb); extern gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); -extern void napi_reuse_skb(struct napi_struct *napi, - struct sk_buff *skb); +extern void napi_gro_flush(struct napi_struct *napi); extern struct sk_buff * napi_get_frags(struct napi_struct *napi); extern gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, @@ -1715,7 +1766,6 @@ extern int netdev_rx_handler_register(struct net_device *dev, void *rx_handler_data); extern void netdev_rx_handler_unregister(struct net_device *dev); -extern void netif_nit_deliver(struct sk_buff *skb); extern int dev_valid_name(const char *name); extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); extern int dev_ethtool(struct net *net, struct ifreq *); @@ -1749,7 +1799,7 @@ extern void netdev_run_todo(void); */ static inline void dev_put(struct net_device *dev) { - atomic_dec(&dev->refcnt); + irqsafe_cpu_dec(*dev->pcpu_refcnt); } /** @@ -1760,7 +1810,7 @@ static inline void dev_put(struct net_device *dev) */ static inline void dev_hold(struct net_device *dev) { - atomic_inc(&dev->refcnt); + irqsafe_cpu_inc(*dev->pcpu_refcnt); } /* Carrier loss detection, dial on demand. The functions netif_carrier_on @@ -2171,6 +2221,8 @@ extern void dev_seq_stop(struct seq_file *seq, void *v); extern int netdev_class_create_file(struct class_attribute *class_attr); extern void netdev_class_remove_file(struct class_attribute *class_attr); +extern struct kobj_ns_type_operations net_ns_type_operations; + extern char *netdev_drivername(const struct net_device *dev, char *buffer, int len); extern void linkwatch_run_queue(void); @@ -2191,14 +2243,22 @@ static inline int net_gso_ok(int features, int gso_type) static inline int skb_gso_ok(struct sk_buff *skb, int features) { return net_gso_ok(features, skb_shinfo(skb)->gso_type) && - (!skb_has_frags(skb) || (features & NETIF_F_FRAGLIST)); + (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST)); } static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) { - return skb_is_gso(skb) && - (!skb_gso_ok(skb, dev->features) || - unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); + if (skb_is_gso(skb)) { + int features = dev->features; + + if (skb->protocol == htons(ETH_P_8021Q) || skb->vlan_tci) + features &= dev->vlan_features; + + return (!skb_gso_ok(skb, features) || + unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); + } + + return 0; } static inline void netif_set_gso_max_size(struct net_device *dev, diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 1afd18c855ec..50cdc2559a5a 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -98,8 +98,14 @@ enum ip_conntrack_events { enum ip_conntrack_expect_events { IPEXP_NEW, /* new expectation */ + IPEXP_DESTROY, /* destroyed expectation */ }; +/* expectation flags */ +#define NF_CT_EXPECT_PERMANENT 0x1 +#define NF_CT_EXPECT_INACTIVE 0x2 +#define NF_CT_EXPECT_USERSPACE 0x4 + #ifdef __KERNEL__ struct ip_conntrack_stat { unsigned int searched; diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index ff8cfbcf3b81..0ce91d56a5f2 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -89,6 +89,7 @@ enum sip_header_types { SIP_HDR_VIA_TCP, SIP_HDR_EXPIRES, SIP_HDR_CONTENT_LENGTH, + SIP_HDR_CALL_ID, }; enum sdp_header_types { diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 9ed534c991b9..19711e3ffd42 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -39,8 +39,9 @@ enum ctattr_type { CTA_TUPLE_MASTER, CTA_NAT_SEQ_ADJ_ORIG, CTA_NAT_SEQ_ADJ_REPLY, - CTA_SECMARK, + CTA_SECMARK, /* obsolete */ CTA_ZONE, + CTA_SECCTX, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) @@ -161,6 +162,7 @@ enum ctattr_expect { CTA_EXPECT_ID, CTA_EXPECT_HELP_NAME, CTA_EXPECT_ZONE, + CTA_EXPECT_FLAGS, __CTA_EXPECT_MAX }; #define CTA_EXPECT_MAX (__CTA_EXPECT_MAX - 1) @@ -172,4 +174,11 @@ enum ctattr_help { }; #define CTA_HELP_MAX (__CTA_HELP_MAX - 1) +enum ctattr_secctx { + CTA_SECCTX_UNSPEC, + CTA_SECCTX_NAME, + __CTA_SECCTX_MAX +}; +#define CTA_SECCTX_MAX (__CTA_SECCTX_MAX - 1) + #endif /* _IPCONNTRACK_NETLINK_H */ diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 24e5d01d27d0..742bec051440 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -66,6 +66,11 @@ struct xt_standard_target { int verdict; }; +struct xt_error_target { + struct xt_entry_target target; + char errorname[XT_FUNCTION_MAXNAMELEN]; +}; + /* The argument to IPT_SO_GET_REVISION_*. Returns highest revision * kernel supports, if >= revision. */ struct xt_get_revision { diff --git a/include/linux/netfilter/xt_SECMARK.h b/include/linux/netfilter/xt_SECMARK.h index 6fcd3448b186..989092bd6274 100644 --- a/include/linux/netfilter/xt_SECMARK.h +++ b/include/linux/netfilter/xt_SECMARK.h @@ -11,18 +11,12 @@ * packets are being marked for. */ #define SECMARK_MODE_SEL 0x01 /* SELinux */ -#define SECMARK_SELCTX_MAX 256 - -struct xt_secmark_target_selinux_info { - __u32 selsid; - char selctx[SECMARK_SELCTX_MAX]; -}; +#define SECMARK_SECCTX_MAX 256 struct xt_secmark_target_info { __u8 mode; - union { - struct xt_secmark_target_selinux_info sel; - } u; + __u32 secid; + char secctx[SECMARK_SECCTX_MAX]; }; #endif /*_XT_SECMARK_H_target */ diff --git a/include/linux/netfilter/xt_TPROXY.h b/include/linux/netfilter/xt_TPROXY.h index 152e8f97132b..3f3d69361289 100644 --- a/include/linux/netfilter/xt_TPROXY.h +++ b/include/linux/netfilter/xt_TPROXY.h @@ -1,5 +1,5 @@ -#ifndef _XT_TPROXY_H_target -#define _XT_TPROXY_H_target +#ifndef _XT_TPROXY_H +#define _XT_TPROXY_H /* TPROXY target is capable of marking the packet to perform * redirection. We can get rid of that whenever we get support for @@ -11,4 +11,11 @@ struct xt_tproxy_target_info { __be16 lport; }; -#endif /* _XT_TPROXY_H_target */ +struct xt_tproxy_target_info_v1 { + u_int32_t mark_mask; + u_int32_t mark_value; + union nf_inet_addr laddr; + __be16 lport; +}; + +#endif /* _XT_TPROXY_H */ diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index e9948c0560f6..adbf4bff87ed 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -21,8 +21,21 @@ #include <linux/netfilter/x_tables.h> +#ifndef __KERNEL__ #define ARPT_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN #define ARPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN +#define arpt_entry_target xt_entry_target +#define arpt_standard_target xt_standard_target +#define arpt_error_target xt_error_target +#define ARPT_CONTINUE XT_CONTINUE +#define ARPT_RETURN XT_RETURN +#define arpt_counters_info xt_counters_info +#define arpt_counters xt_counters +#define ARPT_STANDARD_TARGET XT_STANDARD_TARGET +#define ARPT_ERROR_TARGET XT_ERROR_TARGET +#define ARPT_ENTRY_ITERATE(entries, size, fn, args...) \ + XT_ENTRY_ITERATE(struct arpt_entry, entries, size, fn, ## args) +#endif #define ARPT_DEV_ADDR_LEN_MAX 16 @@ -63,9 +76,6 @@ struct arpt_arp { u_int16_t invflags; }; -#define arpt_entry_target xt_entry_target -#define arpt_standard_target xt_standard_target - /* Values for "flag" field in struct arpt_ip (general arp structure). * No flags defined yet. */ @@ -125,16 +135,10 @@ struct arpt_entry #define ARPT_SO_GET_REVISION_TARGET (ARPT_BASE_CTL + 3) #define ARPT_SO_GET_MAX (ARPT_SO_GET_REVISION_TARGET) -/* CONTINUE verdict for targets */ -#define ARPT_CONTINUE XT_CONTINUE - -/* For standard target */ -#define ARPT_RETURN XT_RETURN - /* The argument to ARPT_SO_GET_INFO */ struct arpt_getinfo { /* Which table: caller fills this in. */ - char name[ARPT_TABLE_MAXNAMELEN]; + char name[XT_TABLE_MAXNAMELEN]; /* Kernel fills these in. */ /* Which hook entry points are valid: bitmask */ @@ -156,7 +160,7 @@ struct arpt_getinfo { /* The argument to ARPT_SO_SET_REPLACE. */ struct arpt_replace { /* Which table. */ - char name[ARPT_TABLE_MAXNAMELEN]; + char name[XT_TABLE_MAXNAMELEN]; /* Which hook entry points are valid: bitmask. You can't change this. */ @@ -184,14 +188,10 @@ struct arpt_replace { struct arpt_entry entries[0]; }; -/* The argument to ARPT_SO_ADD_COUNTERS. */ -#define arpt_counters_info xt_counters_info -#define arpt_counters xt_counters - /* The argument to ARPT_SO_GET_ENTRIES. */ struct arpt_get_entries { /* Which table: user fills this in. */ - char name[ARPT_TABLE_MAXNAMELEN]; + char name[XT_TABLE_MAXNAMELEN]; /* User fills this in: total entry size. */ unsigned int size; @@ -200,23 +200,12 @@ struct arpt_get_entries { struct arpt_entry entrytable[0]; }; -/* Standard return verdict, or do jump. */ -#define ARPT_STANDARD_TARGET XT_STANDARD_TARGET -/* Error verdict. */ -#define ARPT_ERROR_TARGET XT_ERROR_TARGET - /* Helper functions */ -static __inline__ struct arpt_entry_target *arpt_get_target(struct arpt_entry *e) +static __inline__ struct xt_entry_target *arpt_get_target(struct arpt_entry *e) { return (void *)e + e->target_offset; } -#ifndef __KERNEL__ -/* fn returns 0 to continue iteration */ -#define ARPT_ENTRY_ITERATE(entries, size, fn, args...) \ - XT_ENTRY_ITERATE(struct arpt_entry, entries, size, fn, ## args) -#endif - /* * Main firewall chains definitions and global var's definitions. */ @@ -225,17 +214,12 @@ static __inline__ struct arpt_entry_target *arpt_get_target(struct arpt_entry *e /* Standard entry. */ struct arpt_standard { struct arpt_entry entry; - struct arpt_standard_target target; -}; - -struct arpt_error_target { - struct arpt_entry_target target; - char errorname[ARPT_FUNCTION_MAXNAMELEN]; + struct xt_standard_target target; }; struct arpt_error { struct arpt_entry entry; - struct arpt_error_target target; + struct xt_error_target target; }; #define ARPT_ENTRY_INIT(__size) \ @@ -247,16 +231,16 @@ struct arpt_error { #define ARPT_STANDARD_INIT(__verdict) \ { \ .entry = ARPT_ENTRY_INIT(sizeof(struct arpt_standard)), \ - .target = XT_TARGET_INIT(ARPT_STANDARD_TARGET, \ - sizeof(struct arpt_standard_target)), \ + .target = XT_TARGET_INIT(XT_STANDARD_TARGET, \ + sizeof(struct xt_standard_target)), \ .target.verdict = -(__verdict) - 1, \ } #define ARPT_ERROR_INIT \ { \ .entry = ARPT_ENTRY_INIT(sizeof(struct arpt_error)), \ - .target = XT_TARGET_INIT(ARPT_ERROR_TARGET, \ - sizeof(struct arpt_error_target)), \ + .target = XT_TARGET_INIT(XT_ERROR_TARGET, \ + sizeof(struct xt_error_target)), \ .target.errorname = "ERROR", \ } @@ -271,8 +255,6 @@ extern unsigned int arpt_do_table(struct sk_buff *skb, const struct net_device *out, struct xt_table *table); -#define ARPT_ALIGN(s) XT_ALIGN(s) - #ifdef CONFIG_COMPAT #include <net/compat.h> @@ -285,14 +267,12 @@ struct compat_arpt_entry { unsigned char elems[0]; }; -static inline struct arpt_entry_target * +static inline struct xt_entry_target * compat_arpt_get_target(struct compat_arpt_entry *e) { return (void *)e + e->target_offset; } -#define COMPAT_ARPT_ALIGN(s) COMPAT_XT_ALIGN(s) - #endif /* CONFIG_COMPAT */ #endif /*__KERNEL__*/ #endif /* _ARPTABLES_H */ diff --git a/include/linux/netfilter_bridge/Kbuild b/include/linux/netfilter_bridge/Kbuild index d4d78672873e..e48f1a3f5a4a 100644 --- a/include/linux/netfilter_bridge/Kbuild +++ b/include/linux/netfilter_bridge/Kbuild @@ -3,11 +3,13 @@ header-y += ebt_among.h header-y += ebt_arp.h header-y += ebt_arpreply.h header-y += ebt_ip.h +header-y += ebt_ip6.h header-y += ebt_limit.h header-y += ebt_log.h header-y += ebt_mark_m.h header-y += ebt_mark_t.h header-y += ebt_nat.h +header-y += ebt_nflog.h header-y += ebt_pkttype.h header-y += ebt_redirect.h header-y += ebt_stp.h diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 704a7b6e8169..64a5d95c58e8 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -27,12 +27,49 @@ #include <linux/netfilter/x_tables.h> +#ifndef __KERNEL__ #define IPT_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN #define IPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN #define ipt_match xt_match #define ipt_target xt_target #define ipt_table xt_table #define ipt_get_revision xt_get_revision +#define ipt_entry_match xt_entry_match +#define ipt_entry_target xt_entry_target +#define ipt_standard_target xt_standard_target +#define ipt_error_target xt_error_target +#define ipt_counters xt_counters +#define IPT_CONTINUE XT_CONTINUE +#define IPT_RETURN XT_RETURN + +/* This group is older than old (iptables < v1.4.0-rc1~89) */ +#include <linux/netfilter/xt_tcpudp.h> +#define ipt_udp xt_udp +#define ipt_tcp xt_tcp +#define IPT_TCP_INV_SRCPT XT_TCP_INV_SRCPT +#define IPT_TCP_INV_DSTPT XT_TCP_INV_DSTPT +#define IPT_TCP_INV_FLAGS XT_TCP_INV_FLAGS +#define IPT_TCP_INV_OPTION XT_TCP_INV_OPTION +#define IPT_TCP_INV_MASK XT_TCP_INV_MASK +#define IPT_UDP_INV_SRCPT XT_UDP_INV_SRCPT +#define IPT_UDP_INV_DSTPT XT_UDP_INV_DSTPT +#define IPT_UDP_INV_MASK XT_UDP_INV_MASK + +/* The argument to IPT_SO_ADD_COUNTERS. */ +#define ipt_counters_info xt_counters_info +/* Standard return verdict, or do jump. */ +#define IPT_STANDARD_TARGET XT_STANDARD_TARGET +/* Error verdict. */ +#define IPT_ERROR_TARGET XT_ERROR_TARGET + +/* fn returns 0 to continue iteration */ +#define IPT_MATCH_ITERATE(e, fn, args...) \ + XT_MATCH_ITERATE(struct ipt_entry, e, fn, ## args) + +/* fn returns 0 to continue iteration */ +#define IPT_ENTRY_ITERATE(entries, size, fn, args...) \ + XT_ENTRY_ITERATE(struct ipt_entry, entries, size, fn, ## args) +#endif /* Yes, Virginia, you have to zero the padding. */ struct ipt_ip { @@ -52,12 +89,6 @@ struct ipt_ip { u_int8_t invflags; }; -#define ipt_entry_match xt_entry_match -#define ipt_entry_target xt_entry_target -#define ipt_standard_target xt_standard_target - -#define ipt_counters xt_counters - /* Values for "flag" field in struct ipt_ip (general ip structure). */ #define IPT_F_FRAG 0x01 /* Set if rule is a fragment rule */ #define IPT_F_GOTO 0x02 /* Set if jump is a goto */ @@ -116,23 +147,6 @@ struct ipt_entry { #define IPT_SO_GET_REVISION_TARGET (IPT_BASE_CTL + 3) #define IPT_SO_GET_MAX IPT_SO_GET_REVISION_TARGET -#define IPT_CONTINUE XT_CONTINUE -#define IPT_RETURN XT_RETURN - -#include <linux/netfilter/xt_tcpudp.h> -#define ipt_udp xt_udp -#define ipt_tcp xt_tcp - -#define IPT_TCP_INV_SRCPT XT_TCP_INV_SRCPT -#define IPT_TCP_INV_DSTPT XT_TCP_INV_DSTPT -#define IPT_TCP_INV_FLAGS XT_TCP_INV_FLAGS -#define IPT_TCP_INV_OPTION XT_TCP_INV_OPTION -#define IPT_TCP_INV_MASK XT_TCP_INV_MASK - -#define IPT_UDP_INV_SRCPT XT_UDP_INV_SRCPT -#define IPT_UDP_INV_DSTPT XT_UDP_INV_DSTPT -#define IPT_UDP_INV_MASK XT_UDP_INV_MASK - /* ICMP matching stuff */ struct ipt_icmp { u_int8_t type; /* type to match */ @@ -146,7 +160,7 @@ struct ipt_icmp { /* The argument to IPT_SO_GET_INFO */ struct ipt_getinfo { /* Which table: caller fills this in. */ - char name[IPT_TABLE_MAXNAMELEN]; + char name[XT_TABLE_MAXNAMELEN]; /* Kernel fills these in. */ /* Which hook entry points are valid: bitmask */ @@ -168,7 +182,7 @@ struct ipt_getinfo { /* The argument to IPT_SO_SET_REPLACE. */ struct ipt_replace { /* Which table. */ - char name[IPT_TABLE_MAXNAMELEN]; + char name[XT_TABLE_MAXNAMELEN]; /* Which hook entry points are valid: bitmask. You can't change this. */ @@ -196,13 +210,10 @@ struct ipt_replace { struct ipt_entry entries[0]; }; -/* The argument to IPT_SO_ADD_COUNTERS. */ -#define ipt_counters_info xt_counters_info - /* The argument to IPT_SO_GET_ENTRIES. */ struct ipt_get_entries { /* Which table: user fills this in. */ - char name[IPT_TABLE_MAXNAMELEN]; + char name[XT_TABLE_MAXNAMELEN]; /* User fills this in: total entry size. */ unsigned int size; @@ -211,28 +222,13 @@ struct ipt_get_entries { struct ipt_entry entrytable[0]; }; -/* Standard return verdict, or do jump. */ -#define IPT_STANDARD_TARGET XT_STANDARD_TARGET -/* Error verdict. */ -#define IPT_ERROR_TARGET XT_ERROR_TARGET - /* Helper functions */ -static __inline__ struct ipt_entry_target * +static __inline__ struct xt_entry_target * ipt_get_target(struct ipt_entry *e) { return (void *)e + e->target_offset; } -#ifndef __KERNEL__ -/* fn returns 0 to continue iteration */ -#define IPT_MATCH_ITERATE(e, fn, args...) \ - XT_MATCH_ITERATE(struct ipt_entry, e, fn, ## args) - -/* fn returns 0 to continue iteration */ -#define IPT_ENTRY_ITERATE(entries, size, fn, args...) \ - XT_ENTRY_ITERATE(struct ipt_entry, entries, size, fn, ## args) -#endif - /* * Main firewall chains definitions and global var's definitions. */ @@ -249,17 +245,12 @@ extern void ipt_unregister_table(struct net *net, struct xt_table *table); /* Standard entry. */ struct ipt_standard { struct ipt_entry entry; - struct ipt_standard_target target; -}; - -struct ipt_error_target { - struct ipt_entry_target target; - char errorname[IPT_FUNCTION_MAXNAMELEN]; + struct xt_standard_target target; }; struct ipt_error { struct ipt_entry entry; - struct ipt_error_target target; + struct xt_error_target target; }; #define IPT_ENTRY_INIT(__size) \ @@ -271,7 +262,7 @@ struct ipt_error { #define IPT_STANDARD_INIT(__verdict) \ { \ .entry = IPT_ENTRY_INIT(sizeof(struct ipt_standard)), \ - .target = XT_TARGET_INIT(IPT_STANDARD_TARGET, \ + .target = XT_TARGET_INIT(XT_STANDARD_TARGET, \ sizeof(struct xt_standard_target)), \ .target.verdict = -(__verdict) - 1, \ } @@ -279,8 +270,8 @@ struct ipt_error { #define IPT_ERROR_INIT \ { \ .entry = IPT_ENTRY_INIT(sizeof(struct ipt_error)), \ - .target = XT_TARGET_INIT(IPT_ERROR_TARGET, \ - sizeof(struct ipt_error_target)), \ + .target = XT_TARGET_INIT(XT_ERROR_TARGET, \ + sizeof(struct xt_error_target)), \ .target.errorname = "ERROR", \ } @@ -291,8 +282,6 @@ extern unsigned int ipt_do_table(struct sk_buff *skb, const struct net_device *out, struct xt_table *table); -#define IPT_ALIGN(s) XT_ALIGN(s) - #ifdef CONFIG_COMPAT #include <net/compat.h> @@ -307,14 +296,12 @@ struct compat_ipt_entry { }; /* Helper functions */ -static inline struct ipt_entry_target * +static inline struct xt_entry_target * compat_ipt_get_target(struct compat_ipt_entry *e) { return (void *)e + e->target_offset; } -#define COMPAT_IPT_ALIGN(s) COMPAT_XT_ALIGN(s) - #endif /* CONFIG_COMPAT */ #endif /*__KERNEL__*/ #endif /* _IPTABLES_H */ diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 18442ff19c07..c9784f7a9c1f 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -27,13 +27,42 @@ #include <linux/netfilter/x_tables.h> +#ifndef __KERNEL__ #define IP6T_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN #define IP6T_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN - #define ip6t_match xt_match #define ip6t_target xt_target #define ip6t_table xt_table #define ip6t_get_revision xt_get_revision +#define ip6t_entry_match xt_entry_match +#define ip6t_entry_target xt_entry_target +#define ip6t_standard_target xt_standard_target +#define ip6t_error_target xt_error_target +#define ip6t_counters xt_counters +#define IP6T_CONTINUE XT_CONTINUE +#define IP6T_RETURN XT_RETURN + +/* Pre-iptables-1.4.0 */ +#include <linux/netfilter/xt_tcpudp.h> +#define ip6t_tcp xt_tcp +#define ip6t_udp xt_udp +#define IP6T_TCP_INV_SRCPT XT_TCP_INV_SRCPT +#define IP6T_TCP_INV_DSTPT XT_TCP_INV_DSTPT +#define IP6T_TCP_INV_FLAGS XT_TCP_INV_FLAGS +#define IP6T_TCP_INV_OPTION XT_TCP_INV_OPTION +#define IP6T_TCP_INV_MASK XT_TCP_INV_MASK +#define IP6T_UDP_INV_SRCPT XT_UDP_INV_SRCPT +#define IP6T_UDP_INV_DSTPT XT_UDP_INV_DSTPT +#define IP6T_UDP_INV_MASK XT_UDP_INV_MASK + +#define ip6t_counters_info xt_counters_info +#define IP6T_STANDARD_TARGET XT_STANDARD_TARGET +#define IP6T_ERROR_TARGET XT_ERROR_TARGET +#define IP6T_MATCH_ITERATE(e, fn, args...) \ + XT_MATCH_ITERATE(struct ip6t_entry, e, fn, ## args) +#define IP6T_ENTRY_ITERATE(entries, size, fn, args...) \ + XT_ENTRY_ITERATE(struct ip6t_entry, entries, size, fn, ## args) +#endif /* Yes, Virginia, you have to zero the padding. */ struct ip6t_ip6 { @@ -62,12 +91,6 @@ struct ip6t_ip6 { u_int8_t invflags; }; -#define ip6t_entry_match xt_entry_match -#define ip6t_entry_target xt_entry_target -#define ip6t_standard_target xt_standard_target - -#define ip6t_counters xt_counters - /* Values for "flag" field in struct ip6t_ip6 (general ip6 structure). */ #define IP6T_F_PROTO 0x01 /* Set if rule cares about upper protocols */ @@ -112,17 +135,12 @@ struct ip6t_entry { /* Standard entry */ struct ip6t_standard { struct ip6t_entry entry; - struct ip6t_standard_target target; -}; - -struct ip6t_error_target { - struct ip6t_entry_target target; - char errorname[IP6T_FUNCTION_MAXNAMELEN]; + struct xt_standard_target target; }; struct ip6t_error { struct ip6t_entry entry; - struct ip6t_error_target target; + struct xt_error_target target; }; #define IP6T_ENTRY_INIT(__size) \ @@ -134,16 +152,16 @@ struct ip6t_error { #define IP6T_STANDARD_INIT(__verdict) \ { \ .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_standard)), \ - .target = XT_TARGET_INIT(IP6T_STANDARD_TARGET, \ - sizeof(struct ip6t_standard_target)), \ + .target = XT_TARGET_INIT(XT_STANDARD_TARGET, \ + sizeof(struct xt_standard_target)), \ .target.verdict = -(__verdict) - 1, \ } #define IP6T_ERROR_INIT \ { \ .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_error)), \ - .target = XT_TARGET_INIT(IP6T_ERROR_TARGET, \ - sizeof(struct ip6t_error_target)), \ + .target = XT_TARGET_INIT(XT_ERROR_TARGET, \ + sizeof(struct xt_error_target)), \ .target.errorname = "ERROR", \ } @@ -166,30 +184,6 @@ struct ip6t_error { #define IP6T_SO_GET_REVISION_TARGET (IP6T_BASE_CTL + 5) #define IP6T_SO_GET_MAX IP6T_SO_GET_REVISION_TARGET -/* CONTINUE verdict for targets */ -#define IP6T_CONTINUE XT_CONTINUE - -/* For standard target */ -#define IP6T_RETURN XT_RETURN - -/* TCP/UDP matching stuff */ -#include <linux/netfilter/xt_tcpudp.h> - -#define ip6t_tcp xt_tcp -#define ip6t_udp xt_udp - -/* Values for "inv" field in struct ipt_tcp. */ -#define IP6T_TCP_INV_SRCPT XT_TCP_INV_SRCPT -#define IP6T_TCP_INV_DSTPT XT_TCP_INV_DSTPT -#define IP6T_TCP_INV_FLAGS XT_TCP_INV_FLAGS -#define IP6T_TCP_INV_OPTION XT_TCP_INV_OPTION -#define IP6T_TCP_INV_MASK XT_TCP_INV_MASK - -/* Values for "invflags" field in struct ipt_udp. */ -#define IP6T_UDP_INV_SRCPT XT_UDP_INV_SRCPT -#define IP6T_UDP_INV_DSTPT XT_UDP_INV_DSTPT -#define IP6T_UDP_INV_MASK XT_UDP_INV_MASK - /* ICMP matching stuff */ struct ip6t_icmp { u_int8_t type; /* type to match */ @@ -203,7 +197,7 @@ struct ip6t_icmp { /* The argument to IP6T_SO_GET_INFO */ struct ip6t_getinfo { /* Which table: caller fills this in. */ - char name[IP6T_TABLE_MAXNAMELEN]; + char name[XT_TABLE_MAXNAMELEN]; /* Kernel fills these in. */ /* Which hook entry points are valid: bitmask */ @@ -225,7 +219,7 @@ struct ip6t_getinfo { /* The argument to IP6T_SO_SET_REPLACE. */ struct ip6t_replace { /* Which table. */ - char name[IP6T_TABLE_MAXNAMELEN]; + char name[XT_TABLE_MAXNAMELEN]; /* Which hook entry points are valid: bitmask. You can't change this. */ @@ -253,13 +247,10 @@ struct ip6t_replace { struct ip6t_entry entries[0]; }; -/* The argument to IP6T_SO_ADD_COUNTERS. */ -#define ip6t_counters_info xt_counters_info - /* The argument to IP6T_SO_GET_ENTRIES. */ struct ip6t_get_entries { /* Which table: user fills this in. */ - char name[IP6T_TABLE_MAXNAMELEN]; + char name[XT_TABLE_MAXNAMELEN]; /* User fills this in: total entry size. */ unsigned int size; @@ -268,28 +259,13 @@ struct ip6t_get_entries { struct ip6t_entry entrytable[0]; }; -/* Standard return verdict, or do jump. */ -#define IP6T_STANDARD_TARGET XT_STANDARD_TARGET -/* Error verdict. */ -#define IP6T_ERROR_TARGET XT_ERROR_TARGET - /* Helper functions */ -static __inline__ struct ip6t_entry_target * +static __inline__ struct xt_entry_target * ip6t_get_target(struct ip6t_entry *e) { return (void *)e + e->target_offset; } -#ifndef __KERNEL__ -/* fn returns 0 to continue iteration */ -#define IP6T_MATCH_ITERATE(e, fn, args...) \ - XT_MATCH_ITERATE(struct ip6t_entry, e, fn, ## args) - -/* fn returns 0 to continue iteration */ -#define IP6T_ENTRY_ITERATE(entries, size, fn, args...) \ - XT_ENTRY_ITERATE(struct ip6t_entry, entries, size, fn, ## args) -#endif - /* * Main firewall chains definitions and global var's definitions. */ @@ -316,8 +292,6 @@ extern int ip6t_ext_hdr(u8 nexthdr); extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, int target, unsigned short *fragoff); -#define IP6T_ALIGN(s) XT_ALIGN(s) - #ifdef CONFIG_COMPAT #include <net/compat.h> @@ -331,14 +305,12 @@ struct compat_ip6t_entry { unsigned char elems[0]; }; -static inline struct ip6t_entry_target * +static inline struct xt_entry_target * compat_ip6t_get_target(struct compat_ip6t_entry *e) { return (void *)e + e->target_offset; } -#define COMPAT_IP6T_ALIGN(s) COMPAT_XT_ALIGN(s) - #endif /* CONFIG_COMPAT */ #endif /*__KERNEL__*/ #endif /* _IP6_TABLES_H */ diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 50d8009be86c..79358bb712c6 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -14,7 +14,6 @@ struct netpoll { struct net_device *dev; - struct net_device *real_dev; char dev_name[IFNAMSIZ]; const char *name; void (*rx_hook)(struct netpoll *, int, char *, int); @@ -53,7 +52,13 @@ void netpoll_set_trap(int trap); void __netpoll_cleanup(struct netpoll *np); void netpoll_cleanup(struct netpoll *np); int __netpoll_rx(struct sk_buff *skb); -void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb); +void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, + struct net_device *dev); +static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) +{ + netpoll_send_skb_on_dev(np, skb, np->dev); +} + #ifdef CONFIG_NETPOLL diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 508f8cf6da37..d0edf7d823ae 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -185,7 +185,7 @@ struct nfs_inode { struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ struct list_head open_states; - struct nfs_delegation *delegation; + struct nfs_delegation __rcu *delegation; fmode_t delegation_state; struct rw_semaphore rwsem; #endif /* CONFIG_NFS_V4*/ diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index f5487b6f91ed..227e49dd5720 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -4,16 +4,16 @@ * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * GNU Lesser General Public License for more details. * - * You should have received a copy of the GNU General Public License + * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * @@ -147,7 +147,6 @@ struct nilfs_super_root { #define NILFS_MOUNT_ERRORS_CONT 0x0010 /* Continue on errors */ #define NILFS_MOUNT_ERRORS_RO 0x0020 /* Remount fs ro on errors */ #define NILFS_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ -#define NILFS_MOUNT_SNAPSHOT 0x0080 /* Snapshot flag */ #define NILFS_MOUNT_BARRIER 0x1000 /* Use block barriers */ #define NILFS_MOUNT_STRICT_ORDER 0x2000 /* Apply strict in-order semantics also for data */ @@ -229,6 +228,7 @@ struct nilfs_super_block { */ #define NILFS_CURRENT_REV 2 /* current major revision */ #define NILFS_MINOR_REV 0 /* minor revision */ +#define NILFS_MIN_SUPP_REV 2 /* minimum supported revision */ /* * Feature set definitions @@ -270,6 +270,14 @@ struct nilfs_super_block { segments */ /* + * We call DAT, cpfile, and sufile root metadata files. Inodes of + * these files are written in super root block instead of ifile, and + * garbage collector doesn't keep any past versions of these files. + */ +#define NILFS_ROOT_METADATA_FILE(ino) \ + ((ino) >= NILFS_DAT_INO && (ino) <= NILFS_SUFILE_INO) + +/* * bytes offset of secondary super block */ #define NILFS_SB2_OFFSET_BYTES(devsize) ((((devsize) >> 12) - 1) << 12) diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 2c8701687336..0edb2566c14c 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -40,6 +40,43 @@ */ /** + * DOC: Frame transmission/registration support + * + * Frame transmission and registration support exists to allow userspace + * management entities such as wpa_supplicant react to management frames + * that are not being handled by the kernel. This includes, for example, + * certain classes of action frames that cannot be handled in the kernel + * for various reasons. + * + * Frame registration is done on a per-interface basis and registrations + * cannot be removed other than by closing the socket. It is possible to + * specify a registration filter to register, for example, only for a + * certain type of action frame. In particular with action frames, those + * that userspace registers for will not be returned as unhandled by the + * driver, so that the registered application has to take responsibility + * for doing that. + * + * The type of frame that can be registered for is also dependent on the + * driver and interface type. The frame types are advertised in wiphy + * attributes so applications know what to expect. + * + * NOTE: When an interface changes type while registrations are active, + * these registrations are ignored until the interface type is + * changed again. This means that changing the interface type can + * lead to a situation that couldn't otherwise be produced, but + * any such registrations will be dormant in the sense that they + * will not be serviced, i.e. they will not receive any frames. + * + * Frame transmission allows userspace to send for example the required + * responses to action frames. It is subject to some sanity checking, + * but many frames can be transmitted. When a frame was transmitted, its + * status is indicated to the sending socket. + * + * For more technical details, see the corresponding command descriptions + * below. + */ + +/** * enum nl80211_commands - supported nl80211 commands * * @NL80211_CMD_UNSPEC: unspecified command to catch errors @@ -258,7 +295,9 @@ * auth and assoc steps. For this, you need to specify the SSID in a * %NL80211_ATTR_SSID attribute, and can optionally specify the association * IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_MAC, - * %NL80211_ATTR_WIPHY_FREQ and %NL80211_ATTR_CONTROL_PORT. + * %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT, + * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE and + * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT. * It is also sent as an event, with the BSSID and response IEs when the * connection is established or failed to be established. This can be * determined by the STATUS_CODE attribute. @@ -276,8 +315,8 @@ * channel for the specified amount of time. This can be used to do * off-channel operations like transmit a Public Action frame and wait for * a response while being associated to an AP on another channel. - * %NL80211_ATTR_WIPHY or %NL80211_ATTR_IFINDEX is used to specify which - * radio is used. %NL80211_ATTR_WIPHY_FREQ is used to specify the + * %NL80211_ATTR_IFINDEX is used to specify which interface (and thus + * radio) is used. %NL80211_ATTR_WIPHY_FREQ is used to specify the * frequency for the operation and %NL80211_ATTR_WIPHY_CHANNEL_TYPE may be * optionally used to specify additional channel parameters. * %NL80211_ATTR_DURATION is used to specify the duration in milliseconds @@ -301,16 +340,20 @@ * rate selection. %NL80211_ATTR_IFINDEX is used to specify the interface * and @NL80211_ATTR_TX_RATES the set of allowed rates. * - * @NL80211_CMD_REGISTER_ACTION: Register for receiving certain action frames - * (via @NL80211_CMD_ACTION) for processing in userspace. This command - * requires an interface index and a match attribute containing the first - * few bytes of the frame that should match, e.g. a single byte for only - * a category match or four bytes for vendor frames including the OUI. - * The registration cannot be dropped, but is removed automatically - * when the netlink socket is closed. Multiple registrations can be made. - * @NL80211_CMD_ACTION: Action frame TX request and RX notification. This - * command is used both as a request to transmit an Action frame and as an - * event indicating reception of an Action frame that was not processed in + * @NL80211_CMD_REGISTER_FRAME: Register for receiving certain mgmt frames + * (via @NL80211_CMD_FRAME) for processing in userspace. This command + * requires an interface index, a frame type attribute (optional for + * backward compatibility reasons, if not given assumes action frames) + * and a match attribute containing the first few bytes of the frame + * that should match, e.g. a single byte for only a category match or + * four bytes for vendor frames including the OUI. The registration + * cannot be dropped, but is removed automatically when the netlink + * socket is closed. Multiple registrations can be made. + * @NL80211_CMD_REGISTER_ACTION: Alias for @NL80211_CMD_REGISTER_FRAME for + * backward compatibility + * @NL80211_CMD_FRAME: Management frame TX request and RX notification. This + * command is used both as a request to transmit a management frame and + * as an event indicating reception of a frame that was not processed in * kernel code, but is for us (i.e., which may need to be processed in a * user space application). %NL80211_ATTR_FRAME is used to specify the * frame contents (including header). %NL80211_ATTR_WIPHY_FREQ (and @@ -320,11 +363,14 @@ * operational channel). When called, this operation returns a cookie * (%NL80211_ATTR_COOKIE) that will be included with the TX status event * pertaining to the TX request. - * @NL80211_CMD_ACTION_TX_STATUS: Report TX status of an Action frame - * transmitted with %NL80211_CMD_ACTION. %NL80211_ATTR_COOKIE identifies + * @NL80211_CMD_ACTION: Alias for @NL80211_CMD_FRAME for backward compatibility. + * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame + * transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies * the TX command and %NL80211_ATTR_FRAME includes the contents of the * frame. %NL80211_ATTR_ACK flag is included if the recipient acknowledged * the frame. + * @NL80211_CMD_ACTION_TX_STATUS: Alias for @NL80211_CMD_FRAME_TX_STATUS for + * backward compatibility. * @NL80211_CMD_SET_CQM: Connection quality monitor configuration. This command * is used to configure connection quality monitoring notification trigger * levels. @@ -341,6 +387,8 @@ * of any other interfaces, and other interfaces will again take * precedence when they are used. * + * @NL80211_CMD_SET_WDS_PEER: Set the MAC address of the peer on a WDS interface. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -429,9 +477,12 @@ enum nl80211_commands { NL80211_CMD_SET_TX_BITRATE_MASK, - NL80211_CMD_REGISTER_ACTION, - NL80211_CMD_ACTION, - NL80211_CMD_ACTION_TX_STATUS, + NL80211_CMD_REGISTER_FRAME, + NL80211_CMD_REGISTER_ACTION = NL80211_CMD_REGISTER_FRAME, + NL80211_CMD_FRAME, + NL80211_CMD_ACTION = NL80211_CMD_FRAME, + NL80211_CMD_FRAME_TX_STATUS, + NL80211_CMD_ACTION_TX_STATUS = NL80211_CMD_FRAME_TX_STATUS, NL80211_CMD_SET_POWER_SAVE, NL80211_CMD_GET_POWER_SAVE, @@ -440,6 +491,7 @@ enum nl80211_commands { NL80211_CMD_NOTIFY_CQM, NL80211_CMD_SET_CHANNEL, + NL80211_CMD_SET_WDS_PEER, /* add new commands above here */ @@ -639,6 +691,15 @@ enum nl80211_commands { * request, the driver will assume that the port is unauthorized until * authorized by user space. Otherwise, port is marked authorized by * default in station mode. + * @NL80211_ATTR_CONTROL_PORT_ETHERTYPE: A 16-bit value indicating the + * ethertype that will be used for key negotiation. It can be + * specified with the associate and connect commands. If it is not + * specified, the value defaults to 0x888E (PAE, 802.1X). This + * attribute is also used as a flag in the wiphy information to + * indicate that protocols other than PAE are supported. + * @NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT: When included along with + * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, indicates that the custom + * ethertype frames used for key negotiation must not be encrypted. * * @NL80211_ATTR_TESTDATA: Testmode data blob, passed through to the driver. * We recommend using nested, driver-specific attributes within this. @@ -708,7 +769,16 @@ enum nl80211_commands { * is used with %NL80211_CMD_SET_TX_BITRATE_MASK. * * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain - * at least one byte, currently used with @NL80211_CMD_REGISTER_ACTION. + * at least one byte, currently used with @NL80211_CMD_REGISTER_FRAME. + * @NL80211_ATTR_FRAME_TYPE: A u16 indicating the frame type/subtype for the + * @NL80211_CMD_REGISTER_FRAME command. + * @NL80211_ATTR_TX_FRAME_TYPES: wiphy capability attribute, which is a + * nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing + * information about which frame types can be transmitted with + * %NL80211_CMD_FRAME. + * @NL80211_ATTR_RX_FRAME_TYPES: wiphy capability attribute, which is a + * nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing + * information about which frame types can be registered for RX. * * @NL80211_ATTR_ACK: Flag attribute indicating that the frame was * acknowledged by the recipient. @@ -731,6 +801,9 @@ enum nl80211_commands { * This is used in association with @NL80211_ATTR_WIPHY_TX_POWER_SETTING * for non-automatic settings. * + * @NL80211_ATTR_SUPPORT_IBSS_RSN: The device supports IBSS RSN, which mostly + * means support for per-station GTKs. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -891,6 +964,15 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_TX_POWER_SETTING, NL80211_ATTR_WIPHY_TX_POWER_LEVEL, + NL80211_ATTR_TX_FRAME_TYPES, + NL80211_ATTR_RX_FRAME_TYPES, + NL80211_ATTR_FRAME_TYPE, + + NL80211_ATTR_CONTROL_PORT_ETHERTYPE, + NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT, + + NL80211_ATTR_SUPPORT_IBSS_RSN, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -946,8 +1028,10 @@ enum nl80211_attrs { * @NL80211_IFTYPE_WDS: wireless distribution interface * @NL80211_IFTYPE_MONITOR: monitor interface receiving all frames * @NL80211_IFTYPE_MESH_POINT: mesh point + * @NL80211_IFTYPE_P2P_CLIENT: P2P client + * @NL80211_IFTYPE_P2P_GO: P2P group owner * @NL80211_IFTYPE_MAX: highest interface type number currently defined - * @__NL80211_IFTYPE_AFTER_LAST: internal use + * @NUM_NL80211_IFTYPES: number of defined interface types * * These values are used with the %NL80211_ATTR_IFTYPE * to set the type of an interface. @@ -962,10 +1046,12 @@ enum nl80211_iftype { NL80211_IFTYPE_WDS, NL80211_IFTYPE_MONITOR, NL80211_IFTYPE_MESH_POINT, + NL80211_IFTYPE_P2P_CLIENT, + NL80211_IFTYPE_P2P_GO, /* keep last */ - __NL80211_IFTYPE_AFTER_LAST, - NL80211_IFTYPE_MAX = __NL80211_IFTYPE_AFTER_LAST - 1 + NUM_NL80211_IFTYPES, + NL80211_IFTYPE_MAX = NUM_NL80211_IFTYPES - 1 }; /** @@ -974,11 +1060,14 @@ enum nl80211_iftype { * Station flags. When a station is added to an AP interface, it is * assumed to be already associated (and hence authenticated.) * + * @__NL80211_STA_FLAG_INVALID: attribute number 0 is reserved * @NL80211_STA_FLAG_AUTHORIZED: station is authorized (802.1X) * @NL80211_STA_FLAG_SHORT_PREAMBLE: station is capable of receiving frames * with short barker preamble * @NL80211_STA_FLAG_WME: station is WME/QoS capable * @NL80211_STA_FLAG_MFP: station uses management frame protection + * @NL80211_STA_FLAG_MAX: highest station flag number currently defined + * @__NL80211_STA_FLAG_AFTER_LAST: internal use */ enum nl80211_sta_flags { __NL80211_STA_FLAG_INVALID, @@ -1048,6 +1137,8 @@ enum nl80211_rate_info { * @NL80211_STA_INFO_RX_PACKETS: total received packet (u32, from this station) * @NL80211_STA_INFO_TX_PACKETS: total transmitted packets (u32, to this * station) + * @NL80211_STA_INFO_TX_RETRIES: total retries (u32, to this station) + * @NL80211_STA_INFO_TX_FAILED: total failed packets (u32, to this station) */ enum nl80211_sta_info { __NL80211_STA_INFO_INVALID, @@ -1061,6 +1152,8 @@ enum nl80211_sta_info { NL80211_STA_INFO_TX_BITRATE, NL80211_STA_INFO_RX_PACKETS, NL80211_STA_INFO_TX_PACKETS, + NL80211_STA_INFO_TX_RETRIES, + NL80211_STA_INFO_TX_FAILED, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, @@ -1091,14 +1184,17 @@ enum nl80211_mpath_flags { * information about a mesh path. * * @__NL80211_MPATH_INFO_INVALID: attribute number 0 is reserved - * @NL80211_ATTR_MPATH_FRAME_QLEN: number of queued frames for this destination - * @NL80211_ATTR_MPATH_SN: destination sequence number - * @NL80211_ATTR_MPATH_METRIC: metric (cost) of this mesh path - * @NL80211_ATTR_MPATH_EXPTIME: expiration time for the path, in msec from now - * @NL80211_ATTR_MPATH_FLAGS: mesh path flags, enumerated in + * @NL80211_MPATH_INFO_FRAME_QLEN: number of queued frames for this destination + * @NL80211_MPATH_INFO_SN: destination sequence number + * @NL80211_MPATH_INFO_METRIC: metric (cost) of this mesh path + * @NL80211_MPATH_INFO_EXPTIME: expiration time for the path, in msec from now + * @NL80211_MPATH_INFO_FLAGS: mesh path flags, enumerated in * &enum nl80211_mpath_flags; - * @NL80211_ATTR_MPATH_DISCOVERY_TIMEOUT: total path discovery timeout, in msec - * @NL80211_ATTR_MPATH_DISCOVERY_RETRIES: mesh path discovery retries + * @NL80211_MPATH_INFO_DISCOVERY_TIMEOUT: total path discovery timeout, in msec + * @NL80211_MPATH_INFO_DISCOVERY_RETRIES: mesh path discovery retries + * @NL80211_MPATH_INFO_MAX: highest mesh path information attribute number + * currently defind + * @__NL80211_MPATH_INFO_AFTER_LAST: internal use */ enum nl80211_mpath_info { __NL80211_MPATH_INFO_INVALID, @@ -1127,6 +1223,8 @@ enum nl80211_mpath_info { * @NL80211_BAND_ATTR_HT_CAPA: HT capabilities, as in the HT information IE * @NL80211_BAND_ATTR_HT_AMPDU_FACTOR: A-MPDU factor, as in 11n * @NL80211_BAND_ATTR_HT_AMPDU_DENSITY: A-MPDU density, as in 11n + * @NL80211_BAND_ATTR_MAX: highest band attribute currently defined + * @__NL80211_BAND_ATTR_AFTER_LAST: internal use */ enum nl80211_band_attr { __NL80211_BAND_ATTR_INVALID, @@ -1147,6 +1245,7 @@ enum nl80211_band_attr { /** * enum nl80211_frequency_attr - frequency attributes + * @__NL80211_FREQUENCY_ATTR_INVALID: attribute number 0 is reserved * @NL80211_FREQUENCY_ATTR_FREQ: Frequency in MHz * @NL80211_FREQUENCY_ATTR_DISABLED: Channel is disabled in current * regulatory domain. @@ -1158,6 +1257,9 @@ enum nl80211_band_attr { * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in mBm * (100 * dBm). + * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number + * currently defined + * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use */ enum nl80211_frequency_attr { __NL80211_FREQUENCY_ATTR_INVALID, @@ -1177,9 +1279,13 @@ enum nl80211_frequency_attr { /** * enum nl80211_bitrate_attr - bitrate attributes + * @__NL80211_BITRATE_ATTR_INVALID: attribute number 0 is reserved * @NL80211_BITRATE_ATTR_RATE: Bitrate in units of 100 kbps * @NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE: Short preamble supported * in 2.4 GHz band. + * @NL80211_BITRATE_ATTR_MAX: highest bitrate attribute number + * currently defined + * @__NL80211_BITRATE_ATTR_AFTER_LAST: internal use */ enum nl80211_bitrate_attr { __NL80211_BITRATE_ATTR_INVALID, @@ -1235,6 +1341,7 @@ enum nl80211_reg_type { /** * enum nl80211_reg_rule_attr - regulatory rule attributes + * @__NL80211_REG_RULE_ATTR_INVALID: attribute number 0 is reserved * @NL80211_ATTR_REG_RULE_FLAGS: a set of flags which specify additional * considerations for a given frequency range. These are the * &enum nl80211_reg_rule_flags. @@ -1251,6 +1358,9 @@ enum nl80211_reg_type { * If you don't have one then don't send this. * @NL80211_ATTR_POWER_RULE_MAX_EIRP: the maximum allowed EIRP for * a given frequency range. The value is in mBm (100 * dBm). + * @NL80211_REG_RULE_ATTR_MAX: highest regulatory rule attribute number + * currently defined + * @__NL80211_REG_RULE_ATTR_AFTER_LAST: internal use */ enum nl80211_reg_rule_attr { __NL80211_REG_RULE_ATTR_INVALID, @@ -1302,11 +1412,31 @@ enum nl80211_reg_rule_flags { * @__NL80211_SURVEY_INFO_INVALID: attribute number 0 is reserved * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm) + * @NL80211_SURVEY_INFO_IN_USE: channel is currently being used + * @NL80211_SURVEY_INFO_CHANNEL_TIME: amount of time (in ms) that the radio + * spent on this channel + * @NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY: amount of the time the primary + * channel was sensed busy (either due to activity or energy detect) + * @NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY: amount of time the extension + * channel was sensed busy + * @NL80211_SURVEY_INFO_CHANNEL_TIME_RX: amount of time the radio spent + * receiving data + * @NL80211_SURVEY_INFO_CHANNEL_TIME_TX: amount of time the radio spent + * transmitting data + * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number + * currently defined + * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use */ enum nl80211_survey_info { __NL80211_SURVEY_INFO_INVALID, NL80211_SURVEY_INFO_FREQUENCY, NL80211_SURVEY_INFO_NOISE, + NL80211_SURVEY_INFO_IN_USE, + NL80211_SURVEY_INFO_CHANNEL_TIME, + NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY, + NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY, + NL80211_SURVEY_INFO_CHANNEL_TIME_RX, + NL80211_SURVEY_INFO_CHANNEL_TIME_TX, /* keep last */ __NL80211_SURVEY_INFO_AFTER_LAST, @@ -1466,6 +1596,7 @@ enum nl80211_channel_type { * enum nl80211_bss - netlink attributes for a BSS * * @__NL80211_BSS_INVALID: invalid + * @NL80211_BSS_BSSID: BSSID of the BSS (6 octets) * @NL80211_BSS_FREQUENCY: frequency in MHz (u32) * @NL80211_BSS_TSF: TSF of the received probe response/beacon (u64) * @NL80211_BSS_BEACON_INTERVAL: beacon interval of the (I)BSS (u16) @@ -1509,6 +1640,12 @@ enum nl80211_bss { /** * enum nl80211_bss_status - BSS "status" + * @NL80211_BSS_STATUS_AUTHENTICATED: Authenticated with this BSS. + * @NL80211_BSS_STATUS_ASSOCIATED: Associated with this BSS. + * @NL80211_BSS_STATUS_IBSS_JOINED: Joined to this IBSS. + * + * The BSS status is a BSS attribute in scan dumps, which + * indicates the status the interface has wrt. this BSS. */ enum nl80211_bss_status { NL80211_BSS_STATUS_AUTHENTICATED, @@ -1546,11 +1683,14 @@ enum nl80211_auth_type { * @NL80211_KEYTYPE_GROUP: Group (broadcast/multicast) key * @NL80211_KEYTYPE_PAIRWISE: Pairwise (unicast/individual) key * @NL80211_KEYTYPE_PEERKEY: PeerKey (DLS) + * @NUM_NL80211_KEYTYPES: number of defined key types */ enum nl80211_key_type { NL80211_KEYTYPE_GROUP, NL80211_KEYTYPE_PAIRWISE, NL80211_KEYTYPE_PEERKEY, + + NUM_NL80211_KEYTYPES }; /** @@ -1581,6 +1721,9 @@ enum nl80211_wpa_versions { * CCMP keys, each six bytes in little endian * @NL80211_KEY_DEFAULT: flag indicating default key * @NL80211_KEY_DEFAULT_MGMT: flag indicating default management key + * @NL80211_KEY_TYPE: the key type from enum nl80211_key_type, if not + * specified the default depends on whether a MAC address was + * given with the command using the key or not (u32) * @__NL80211_KEY_AFTER_LAST: internal * @NL80211_KEY_MAX: highest key attribute */ @@ -1592,6 +1735,7 @@ enum nl80211_key_attributes { NL80211_KEY_SEQ, NL80211_KEY_DEFAULT, NL80211_KEY_DEFAULT_MGMT, + NL80211_KEY_TYPE, /* keep last */ __NL80211_KEY_AFTER_LAST, @@ -1619,8 +1763,8 @@ enum nl80211_tx_rate_attributes { /** * enum nl80211_band - Frequency band - * @NL80211_BAND_2GHZ - 2.4 GHz ISM band - * @NL80211_BAND_5GHZ - around 5 GHz band (4.9 - 5.7 GHz) + * @NL80211_BAND_2GHZ: 2.4 GHz ISM band + * @NL80211_BAND_5GHZ: around 5 GHz band (4.9 - 5.7 GHz) */ enum nl80211_band { NL80211_BAND_2GHZ, @@ -1658,9 +1802,9 @@ enum nl80211_attr_cqm { /** * enum nl80211_cqm_rssi_threshold_event - RSSI threshold event - * @NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW - The RSSI level is lower than the + * @NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW: The RSSI level is lower than the * configured threshold - * @NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH - The RSSI is higher than the + * @NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH: The RSSI is higher than the * configured threshold */ enum nl80211_cqm_rssi_threshold_event { diff --git a/include/linux/notifier.h b/include/linux/notifier.h index b2f1a4d83550..2026f9e1ceb8 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -49,28 +49,28 @@ struct notifier_block { int (*notifier_call)(struct notifier_block *, unsigned long, void *); - struct notifier_block *next; + struct notifier_block __rcu *next; int priority; }; struct atomic_notifier_head { spinlock_t lock; - struct notifier_block *head; + struct notifier_block __rcu *head; }; struct blocking_notifier_head { struct rw_semaphore rwsem; - struct notifier_block *head; + struct notifier_block __rcu *head; }; struct raw_notifier_head { - struct notifier_block *head; + struct notifier_block __rcu *head; }; struct srcu_notifier_head { struct mutex mutex; struct srcu_struct srcu; - struct notifier_block *head; + struct notifier_block __rcu *head; }; #define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \ diff --git a/include/linux/opp.h b/include/linux/opp.h new file mode 100644 index 000000000000..5449945d589f --- /dev/null +++ b/include/linux/opp.h @@ -0,0 +1,105 @@ +/* + * Generic OPP Interface + * + * Copyright (C) 2009-2010 Texas Instruments Incorporated. + * Nishanth Menon + * Romit Dasgupta + * Kevin Hilman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_OPP_H__ +#define __LINUX_OPP_H__ + +#include <linux/err.h> +#include <linux/cpufreq.h> + +struct opp; + +#if defined(CONFIG_PM_OPP) + +unsigned long opp_get_voltage(struct opp *opp); + +unsigned long opp_get_freq(struct opp *opp); + +int opp_get_opp_count(struct device *dev); + +struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq, + bool available); + +struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq); + +struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq); + +int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt); + +int opp_enable(struct device *dev, unsigned long freq); + +int opp_disable(struct device *dev, unsigned long freq); + +#else +static inline unsigned long opp_get_voltage(struct opp *opp) +{ + return 0; +} + +static inline unsigned long opp_get_freq(struct opp *opp) +{ + return 0; +} + +static inline int opp_get_opp_count(struct device *dev) +{ + return 0; +} + +static inline struct opp *opp_find_freq_exact(struct device *dev, + unsigned long freq, bool available) +{ + return ERR_PTR(-EINVAL); +} + +static inline struct opp *opp_find_freq_floor(struct device *dev, + unsigned long *freq) +{ + return ERR_PTR(-EINVAL); +} + +static inline struct opp *opp_find_freq_ceil(struct device *dev, + unsigned long *freq) +{ + return ERR_PTR(-EINVAL); +} + +static inline int opp_add(struct device *dev, unsigned long freq, + unsigned long u_volt) +{ + return -EINVAL; +} + +static inline int opp_enable(struct device *dev, unsigned long freq) +{ + return 0; +} + +static inline int opp_disable(struct device *dev, unsigned long freq) +{ + return 0; +} +#endif /* CONFIG_PM */ + +#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP) +int opp_init_cpufreq_table(struct device *dev, + struct cpufreq_frequency_table **table); +#else +static inline int opp_init_cpufreq_table(struct device *dev, + struct cpufreq_frequency_table **table) +{ + return -EINVAL; +} +#endif /* CONFIG_CPU_FREQ */ + +#endif /* __LINUX_OPP_H__ */ diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 5171639ecf0f..32fb81212fd1 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -15,6 +15,7 @@ #include <linux/types.h> #include <linux/spinlock.h> +#include <linux/init.h> #include <asm/atomic.h> /* Each escaped entry is prefixed by ESCAPE_CODE @@ -185,4 +186,10 @@ int oprofile_add_data(struct op_entry *entry, unsigned long val); int oprofile_add_data64(struct op_entry *entry, u64 val); int oprofile_write_commit(struct op_entry *entry); +#ifdef CONFIG_PERF_EVENTS +int __init oprofile_perf_init(struct oprofile_operations *ops); +void oprofile_perf_exit(void); +char *op_name_from_perf_id(void); +#endif /* CONFIG_PERF_EVENTS */ + #endif /* OPROFILE_H */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 570fddeb0388..90c038c0ad96 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -517,6 +517,7 @@ #define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302 #define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303 #define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304 +#define PCI_DEVICE_ID_AMD_15H_NB_MISC 0x1603 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 #define PCI_DEVICE_ID_AMD_SCSI 0x2020 @@ -742,6 +743,7 @@ #define PCI_DEVICE_ID_HP_CISSC 0x3230 #define PCI_DEVICE_ID_HP_CISSD 0x3238 #define PCI_DEVICE_ID_HP_CISSE 0x323a +#define PCI_DEVICE_ID_HP_CISSF 0x323b #define PCI_DEVICE_ID_HP_ZX2_IOC 0x4031 #define PCI_VENDOR_ID_PCTECH 0x1042 @@ -2192,6 +2194,9 @@ #define PCI_VENDOR_ID_ARIMA 0x161f #define PCI_VENDOR_ID_BROCADE 0x1657 +#define PCI_DEVICE_ID_BROCADE_CT 0x0014 +#define PCI_DEVICE_ID_BROCADE_FC_8G1P 0x0017 +#define PCI_DEVICE_ID_BROCADE_CT_FC 0x0021 #define PCI_VENDOR_ID_SIBYTE 0x166d #define PCI_DEVICE_ID_BCM1250_PCI 0x0001 @@ -2315,6 +2320,14 @@ #define PCI_DEVICE_ID_P4080 0x0401 #define PCI_DEVICE_ID_P4040E 0x0408 #define PCI_DEVICE_ID_P4040 0x0409 +#define PCI_DEVICE_ID_P2040E 0x0410 +#define PCI_DEVICE_ID_P2040 0x0411 +#define PCI_DEVICE_ID_P3041E 0x041E +#define PCI_DEVICE_ID_P3041 0x041F +#define PCI_DEVICE_ID_P5020E 0x0420 +#define PCI_DEVICE_ID_P5020 0x0421 +#define PCI_DEVICE_ID_P5010E 0x0428 +#define PCI_DEVICE_ID_P5010 0x0429 #define PCI_DEVICE_ID_MPC8641 0x7010 #define PCI_DEVICE_ID_MPC8641D 0x7011 #define PCI_DEVICE_ID_MPC8610 0x7018 diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index ce2dc655cd1d..018db9a62ffe 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -139,6 +139,27 @@ __aligned(PAGE_SIZE) /* + * Declaration/definition used for per-CPU variables that must be read mostly. + */ +#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, "..readmostly") + +#define DEFINE_PER_CPU_READ_MOSTLY(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, "..readmostly") + +/* + * Declaration/definition used for large per-CPU variables that must be + * aligned to something larger than the pagesize. + */ +#define DECLARE_PER_CPU_MULTIPAGE_ALIGNED(type, name, size) \ + DECLARE_PER_CPU_SECTION(type, name, "..page_aligned") \ + __aligned(size) + +#define DEFINE_PER_CPU_MULTIPAGE_ALIGNED(type, name, size) \ + DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \ + __aligned(size) + +/* * Intermodule exports for per-CPU variables. sparse forgets about * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to * noop if __CHECKER__. diff --git a/include/linux/percpu.h b/include/linux/percpu.h index aeeeef1093cd..5095b834a6fb 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -39,6 +39,15 @@ preempt_enable(); \ } while (0) +#define get_cpu_ptr(var) ({ \ + preempt_disable(); \ + this_cpu_ptr(var); }) + +#define put_cpu_ptr(var) do { \ + (void)(var); \ + preempt_enable(); \ +} while (0) + /* minimum unit size, also is the maximum supported allocation size */ #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 716f99b682c1..057bf22a8323 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -486,6 +486,8 @@ struct perf_guest_info_callbacks { #include <linux/workqueue.h> #include <linux/ftrace.h> #include <linux/cpu.h> +#include <linux/irq_work.h> +#include <linux/jump_label_ref.h> #include <asm/atomic.h> #include <asm/local.h> @@ -529,16 +531,22 @@ struct hw_perf_event { int last_cpu; }; struct { /* software */ - s64 remaining; struct hrtimer hrtimer; }; #ifdef CONFIG_HAVE_HW_BREAKPOINT struct { /* breakpoint */ struct arch_hw_breakpoint info; struct list_head bp_list; + /* + * Crufty hack to avoid the chicken and egg + * problem hw_breakpoint has with context + * creation and event initalization. + */ + struct task_struct *bp_target; }; #endif }; + int state; local64_t prev_count; u64 sample_period; u64 last_period; @@ -550,6 +558,13 @@ struct hw_perf_event { #endif }; +/* + * hw_perf_event::state flags + */ +#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ +#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ +#define PERF_HES_ARCH 0x04 + struct perf_event; /* @@ -561,36 +576,70 @@ struct perf_event; * struct pmu - generic performance monitoring unit */ struct pmu { - int (*enable) (struct perf_event *event); - void (*disable) (struct perf_event *event); - int (*start) (struct perf_event *event); - void (*stop) (struct perf_event *event); - void (*read) (struct perf_event *event); - void (*unthrottle) (struct perf_event *event); + struct list_head entry; + + int * __percpu pmu_disable_count; + struct perf_cpu_context * __percpu pmu_cpu_context; + int task_ctx_nr; + + /* + * Fully disable/enable this PMU, can be used to protect from the PMI + * as well as for lazy/batch writing of the MSRs. + */ + void (*pmu_enable) (struct pmu *pmu); /* optional */ + void (*pmu_disable) (struct pmu *pmu); /* optional */ /* - * Group events scheduling is treated as a transaction, add group - * events as a whole and perform one schedulability test. If the test - * fails, roll back the whole group + * Try and initialize the event for this PMU. + * Should return -ENOENT when the @event doesn't match this PMU. */ + int (*event_init) (struct perf_event *event); + +#define PERF_EF_START 0x01 /* start the counter when adding */ +#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ +#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ /* - * Start the transaction, after this ->enable() doesn't need - * to do schedulability tests. + * Adds/Removes a counter to/from the PMU, can be done inside + * a transaction, see the ->*_txn() methods. */ - void (*start_txn) (const struct pmu *pmu); + int (*add) (struct perf_event *event, int flags); + void (*del) (struct perf_event *event, int flags); + /* - * If ->start_txn() disabled the ->enable() schedulability test + * Starts/Stops a counter present on the PMU. The PMI handler + * should stop the counter when perf_event_overflow() returns + * !0. ->start() will be used to continue. + */ + void (*start) (struct perf_event *event, int flags); + void (*stop) (struct perf_event *event, int flags); + + /* + * Updates the counter value of the event. + */ + void (*read) (struct perf_event *event); + + /* + * Group events scheduling is treated as a transaction, add + * group events as a whole and perform one schedulability test. + * If the test fails, roll back the whole group + * + * Start the transaction, after this ->add() doesn't need to + * do schedulability tests. + */ + void (*start_txn) (struct pmu *pmu); /* optional */ + /* + * If ->start_txn() disabled the ->add() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. */ - int (*commit_txn) (const struct pmu *pmu); + int (*commit_txn) (struct pmu *pmu); /* optional */ /* - * Will cancel the transaction, assumes ->disable() is called for - * each successfull ->enable() during the transaction. + * Will cancel the transaction, assumes ->del() is called + * for each successfull ->add() during the transaction. */ - void (*cancel_txn) (const struct pmu *pmu); + void (*cancel_txn) (struct pmu *pmu); /* optional */ }; /** @@ -631,11 +680,6 @@ struct perf_buffer { void *data_pages[0]; }; -struct perf_pending_entry { - struct perf_pending_entry *next; - void (*func)(struct perf_pending_entry *); -}; - struct perf_sample_data; typedef void (*perf_overflow_handler_t)(struct perf_event *, int, @@ -656,6 +700,7 @@ struct swevent_hlist { #define PERF_ATTACH_CONTEXT 0x01 #define PERF_ATTACH_GROUP 0x02 +#define PERF_ATTACH_TASK 0x04 /** * struct perf_event - performance event kernel representation: @@ -669,7 +714,7 @@ struct perf_event { int nr_siblings; int group_flags; struct perf_event *group_leader; - const struct pmu *pmu; + struct pmu *pmu; enum perf_event_active_state state; unsigned int attach_state; @@ -743,7 +788,7 @@ struct perf_event { int pending_wakeup; int pending_kill; int pending_disable; - struct perf_pending_entry pending; + struct irq_work pending; atomic_t event_limit; @@ -763,12 +808,19 @@ struct perf_event { #endif /* CONFIG_PERF_EVENTS */ }; +enum perf_event_context_type { + task_context, + cpu_context, +}; + /** * struct perf_event_context - event context structure * * Used as a container for task events and CPU events as well: */ struct perf_event_context { + enum perf_event_context_type type; + struct pmu *pmu; /* * Protect the states of the events in the list, * nr_active, and the list: @@ -808,6 +860,12 @@ struct perf_event_context { struct rcu_head rcu_head; }; +/* + * Number of contexts where an event can trigger: + * task, softirq, hardirq, nmi. + */ +#define PERF_NR_CONTEXTS 4 + /** * struct perf_event_cpu_context - per cpu event context structure */ @@ -815,18 +873,9 @@ struct perf_cpu_context { struct perf_event_context ctx; struct perf_event_context *task_ctx; int active_oncpu; - int max_pertask; int exclusive; - struct swevent_hlist *swevent_hlist; - struct mutex hlist_mutex; - int hlist_refcount; - - /* - * Recursion avoidance: - * - * task, softirq, irq, nmi context - */ - int recursion[4]; + struct list_head rotation_list; + int jiffies_interval; }; struct perf_output_handle { @@ -842,26 +891,34 @@ struct perf_output_handle { #ifdef CONFIG_PERF_EVENTS -/* - * Set by architecture code: - */ -extern int perf_max_events; +extern int perf_pmu_register(struct pmu *pmu); +extern void perf_pmu_unregister(struct pmu *pmu); + +extern int perf_num_counters(void); +extern const char *perf_pmu_name(void); +extern void __perf_event_task_sched_in(struct task_struct *task); +extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); -extern const struct pmu *hw_perf_event_init(struct perf_event *event); +extern atomic_t perf_task_events; + +static inline void perf_event_task_sched_in(struct task_struct *task) +{ + COND_STMT(&perf_task_events, __perf_event_task_sched_in(task)); +} + +static inline +void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) +{ + COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next)); +} -extern void perf_event_task_sched_in(struct task_struct *task); -extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); -extern void perf_event_task_tick(struct task_struct *task); extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); -extern void set_perf_event_pending(void); -extern void perf_event_do_pending(void); +extern void perf_event_delayed_put(struct task_struct *task); extern void perf_event_print_debug(void); -extern void __perf_disable(void); -extern bool __perf_enable(void); -extern void perf_disable(void); -extern void perf_enable(void); +extern void perf_pmu_disable(struct pmu *pmu); +extern void perf_pmu_enable(struct pmu *pmu); extern int perf_event_task_disable(void); extern int perf_event_task_enable(void); extern void perf_event_update_userpage(struct perf_event *event); @@ -869,7 +926,7 @@ extern int perf_event_release_kernel(struct perf_event *event); extern struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, - pid_t pid, + struct task_struct *task, perf_overflow_handler_t callback); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); @@ -920,14 +977,7 @@ extern int perf_event_overflow(struct perf_event *event, int nmi, */ static inline int is_software_event(struct perf_event *event) { - switch (event->attr.type) { - case PERF_TYPE_SOFTWARE: - case PERF_TYPE_TRACEPOINT: - /* for now the breakpoint stuff also works as software event */ - case PERF_TYPE_BREAKPOINT: - return 1; - } - return 0; + return event->pmu->task_ctx_nr == perf_sw_context; } extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; @@ -954,18 +1004,20 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs) perf_arch_fetch_caller_regs(regs, CALLER_ADDR0); } -static inline void +static __always_inline void perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { - if (atomic_read(&perf_swevent_enabled[event_id])) { - struct pt_regs hot_regs; - - if (!regs) { - perf_fetch_caller_regs(&hot_regs); - regs = &hot_regs; - } - __perf_sw_event(event_id, nr, nmi, regs, addr); + struct pt_regs hot_regs; + + JUMP_LABEL(&perf_swevent_enabled[event_id], have_event); + return; + +have_event: + if (!regs) { + perf_fetch_caller_regs(&hot_regs); + regs = &hot_regs; } + __perf_sw_event(event_id, nr, nmi, regs, addr); } extern void perf_event_mmap(struct vm_area_struct *vma); @@ -976,7 +1028,21 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks extern void perf_event_comm(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); -extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); +/* Callchains */ +DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); + +extern void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs); +extern void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs); + + +static inline void +perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) +{ + if (entry->nr < PERF_MAX_STACK_DEPTH) + entry->ip[entry->nr++] = ip; +} extern int sysctl_perf_event_paranoid; extern int sysctl_perf_event_mlock; @@ -1019,21 +1085,18 @@ extern int perf_swevent_get_recursion_context(void); extern void perf_swevent_put_recursion_context(int rctx); extern void perf_event_enable(struct perf_event *event); extern void perf_event_disable(struct perf_event *event); +extern void perf_event_task_tick(void); #else static inline void perf_event_task_sched_in(struct task_struct *task) { } static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) { } -static inline void -perf_event_task_tick(struct task_struct *task) { } static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } -static inline void perf_event_do_pending(void) { } +static inline void perf_event_delayed_put(struct task_struct *task) { } static inline void perf_event_print_debug(void) { } -static inline void perf_disable(void) { } -static inline void perf_enable(void) { } static inline int perf_event_task_disable(void) { return -EINVAL; } static inline int perf_event_task_enable(void) { return -EINVAL; } @@ -1056,6 +1119,7 @@ static inline int perf_swevent_get_recursion_context(void) { return -1; } static inline void perf_swevent_put_recursion_context(int rctx) { } static inline void perf_event_enable(struct perf_event *event) { } static inline void perf_event_disable(struct perf_event *event) { } +static inline void perf_event_task_tick(void) { } #endif #define perf_output_put(handle, x) \ diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 76edadf046d3..26c8df786918 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -36,6 +36,9 @@ /* Socket options for SOL_PNPIPE level */ #define PNPIPE_ENCAP 1 #define PNPIPE_IFINDEX 2 +#define PNPIPE_PIPE_HANDLE 3 +#define PNPIPE_ENABLE 4 +/* unused slot */ #define PNADDR_ANY 0 #define PNADDR_BROADCAST 0xFC @@ -47,6 +50,8 @@ /* ioctls */ #define SIOCPNGETOBJECT (SIOCPROTOPRIVATE + 0) +#define SIOCPNADDRESOURCE (SIOCPROTOPRIVATE + 14) +#define SIOCPNDELRESOURCE (SIOCPROTOPRIVATE + 15) /* Phonet protocol header */ struct phonethdr { diff --git a/include/linux/phy.h b/include/linux/phy.h index 6b0a782c6224..a6e047a04f79 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -116,7 +116,7 @@ struct mii_bus { /* list of all PHYs on bus */ struct phy_device *phy_map[PHY_MAX_ADDR]; - /* Phy addresses to be ignored when probing */ + /* PHY addresses to be ignored when probing */ u32 phy_mask; /* @@ -283,7 +283,7 @@ struct phy_device { phy_interface_t interface; - /* Bus address of the PHY (0-32) */ + /* Bus address of the PHY (0-31) */ int addr; /* diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index 7f6ba8658abe..defbde203d07 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -332,6 +332,7 @@ enum { FLOW_KEY_SKUID, FLOW_KEY_SKGID, FLOW_KEY_VLAN_TAG, + FLOW_KEY_RXHASH, __FLOW_KEY_MAX, }; diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index d7ecad0093bb..2e700ec0601f 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -138,6 +138,9 @@ extern struct platform_device *platform_create_bundle(struct platform_driver *dr struct resource *res, unsigned int n_res, const void *data, size_t size); +extern const struct dev_pm_ops * platform_bus_get_pm_ops(void); +extern void platform_bus_set_pm_ops(const struct dev_pm_ops *pm); + /* early platform driver interface */ struct early_platform_driver { const char *class_str; diff --git a/include/linux/pm.h b/include/linux/pm.h index 52e8c55ff314..40f3f45702ba 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -41,6 +41,12 @@ extern void (*pm_power_off_prepare)(void); struct device; +#ifdef CONFIG_PM +extern const char power_group_name[]; /* = "power" */ +#else +#define power_group_name NULL +#endif + typedef struct pm_message { int event; } pm_message_t; @@ -438,6 +444,9 @@ enum rpm_status { * * RPM_REQ_SUSPEND Run the device bus type's ->runtime_suspend() callback * + * RPM_REQ_AUTOSUSPEND Same as RPM_REQ_SUSPEND, but not until the device has + * been inactive for as long as power.autosuspend_delay + * * RPM_REQ_RESUME Run the device bus type's ->runtime_resume() callback */ @@ -445,26 +454,28 @@ enum rpm_request { RPM_REQ_NONE = 0, RPM_REQ_IDLE, RPM_REQ_SUSPEND, + RPM_REQ_AUTOSUSPEND, RPM_REQ_RESUME, }; +struct wakeup_source; + struct dev_pm_info { pm_message_t power_state; unsigned int can_wakeup:1; - unsigned int should_wakeup:1; unsigned async_suspend:1; enum dpm_state status; /* Owned by the PM core */ + spinlock_t lock; #ifdef CONFIG_PM_SLEEP struct list_head entry; struct completion completion; - unsigned long wakeup_count; + struct wakeup_source *wakeup; #endif #ifdef CONFIG_PM_RUNTIME struct timer_list suspend_timer; unsigned long timer_expires; struct work_struct work; wait_queue_head_t wait_queue; - spinlock_t lock; atomic_t usage_count; atomic_t child_count; unsigned int disable_depth:3; @@ -474,9 +485,14 @@ struct dev_pm_info { unsigned int deferred_resume:1; unsigned int run_wake:1; unsigned int runtime_auto:1; + unsigned int no_callbacks:1; + unsigned int use_autosuspend:1; + unsigned int timer_autosuspends:1; enum rpm_request request; enum rpm_status runtime_status; int runtime_error; + int autosuspend_delay; + unsigned long last_busy; unsigned long active_jiffies; unsigned long suspended_jiffies; unsigned long accounting_timestamp; @@ -558,12 +574,7 @@ extern void __suspend_report_result(const char *function, void *fn, int ret); __suspend_report_result(__func__, fn, ret); \ } while (0) -extern void device_pm_wait_for_dev(struct device *sub, struct device *dev); - -/* drivers/base/power/wakeup.c */ -extern void pm_wakeup_event(struct device *dev, unsigned int msec); -extern void pm_stay_awake(struct device *dev); -extern void pm_relax(void); +extern int device_pm_wait_for_dev(struct device *sub, struct device *dev); #else /* !CONFIG_PM_SLEEP */ #define device_pm_lock() do {} while (0) @@ -576,11 +587,10 @@ static inline int dpm_suspend_start(pm_message_t state) #define suspend_report_result(fn, ret) do {} while (0) -static inline void device_pm_wait_for_dev(struct device *a, struct device *b) {} - -static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {} -static inline void pm_stay_awake(struct device *dev) {} -static inline void pm_relax(void) {} +static inline int device_pm_wait_for_dev(struct device *a, struct device *b) +{ + return 0; +} #endif /* !CONFIG_PM_SLEEP */ /* How to reorder dpm_list after device_move() */ diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 6e81888c6222..3ec2358f8692 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -12,18 +12,24 @@ #include <linux/device.h> #include <linux/pm.h> +#include <linux/jiffies.h> + +/* Runtime PM flag argument bits */ +#define RPM_ASYNC 0x01 /* Request is asynchronous */ +#define RPM_NOWAIT 0x02 /* Don't wait for concurrent + state change */ +#define RPM_GET_PUT 0x04 /* Increment/decrement the + usage_count */ +#define RPM_AUTO 0x08 /* Use autosuspend_delay */ + #ifdef CONFIG_PM_RUNTIME extern struct workqueue_struct *pm_wq; -extern int pm_runtime_idle(struct device *dev); -extern int pm_runtime_suspend(struct device *dev); -extern int pm_runtime_resume(struct device *dev); -extern int pm_request_idle(struct device *dev); +extern int __pm_runtime_idle(struct device *dev, int rpmflags); +extern int __pm_runtime_suspend(struct device *dev, int rpmflags); +extern int __pm_runtime_resume(struct device *dev, int rpmflags); extern int pm_schedule_suspend(struct device *dev, unsigned int delay); -extern int pm_request_resume(struct device *dev); -extern int __pm_runtime_get(struct device *dev, bool sync); -extern int __pm_runtime_put(struct device *dev, bool sync); extern int __pm_runtime_set_status(struct device *dev, unsigned int status); extern int pm_runtime_barrier(struct device *dev); extern void pm_runtime_enable(struct device *dev); @@ -33,6 +39,10 @@ extern void pm_runtime_forbid(struct device *dev); extern int pm_generic_runtime_idle(struct device *dev); extern int pm_generic_runtime_suspend(struct device *dev); extern int pm_generic_runtime_resume(struct device *dev); +extern void pm_runtime_no_callbacks(struct device *dev); +extern void __pm_runtime_use_autosuspend(struct device *dev, bool use); +extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); +extern unsigned long pm_runtime_autosuspend_expiration(struct device *dev); static inline bool pm_children_suspended(struct device *dev) { @@ -70,19 +80,29 @@ static inline bool pm_runtime_suspended(struct device *dev) return dev->power.runtime_status == RPM_SUSPENDED; } +static inline void pm_runtime_mark_last_busy(struct device *dev) +{ + ACCESS_ONCE(dev->power.last_busy) = jiffies; +} + #else /* !CONFIG_PM_RUNTIME */ -static inline int pm_runtime_idle(struct device *dev) { return -ENOSYS; } -static inline int pm_runtime_suspend(struct device *dev) { return -ENOSYS; } -static inline int pm_runtime_resume(struct device *dev) { return 0; } -static inline int pm_request_idle(struct device *dev) { return -ENOSYS; } +static inline int __pm_runtime_idle(struct device *dev, int rpmflags) +{ + return -ENOSYS; +} +static inline int __pm_runtime_suspend(struct device *dev, int rpmflags) +{ + return -ENOSYS; +} +static inline int __pm_runtime_resume(struct device *dev, int rpmflags) +{ + return 1; +} static inline int pm_schedule_suspend(struct device *dev, unsigned int delay) { return -ENOSYS; } -static inline int pm_request_resume(struct device *dev) { return 0; } -static inline int __pm_runtime_get(struct device *dev, bool sync) { return 1; } -static inline int __pm_runtime_put(struct device *dev, bool sync) { return 0; } static inline int __pm_runtime_set_status(struct device *dev, unsigned int status) { return 0; } static inline int pm_runtime_barrier(struct device *dev) { return 0; } @@ -102,27 +122,82 @@ static inline bool pm_runtime_suspended(struct device *dev) { return false; } static inline int pm_generic_runtime_idle(struct device *dev) { return 0; } static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; } static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } +static inline void pm_runtime_no_callbacks(struct device *dev) {} + +static inline void pm_runtime_mark_last_busy(struct device *dev) {} +static inline void __pm_runtime_use_autosuspend(struct device *dev, + bool use) {} +static inline void pm_runtime_set_autosuspend_delay(struct device *dev, + int delay) {} +static inline unsigned long pm_runtime_autosuspend_expiration( + struct device *dev) { return 0; } #endif /* !CONFIG_PM_RUNTIME */ +static inline int pm_runtime_idle(struct device *dev) +{ + return __pm_runtime_idle(dev, 0); +} + +static inline int pm_runtime_suspend(struct device *dev) +{ + return __pm_runtime_suspend(dev, 0); +} + +static inline int pm_runtime_autosuspend(struct device *dev) +{ + return __pm_runtime_suspend(dev, RPM_AUTO); +} + +static inline int pm_runtime_resume(struct device *dev) +{ + return __pm_runtime_resume(dev, 0); +} + +static inline int pm_request_idle(struct device *dev) +{ + return __pm_runtime_idle(dev, RPM_ASYNC); +} + +static inline int pm_request_resume(struct device *dev) +{ + return __pm_runtime_resume(dev, RPM_ASYNC); +} + +static inline int pm_request_autosuspend(struct device *dev) +{ + return __pm_runtime_suspend(dev, RPM_ASYNC | RPM_AUTO); +} + static inline int pm_runtime_get(struct device *dev) { - return __pm_runtime_get(dev, false); + return __pm_runtime_resume(dev, RPM_GET_PUT | RPM_ASYNC); } static inline int pm_runtime_get_sync(struct device *dev) { - return __pm_runtime_get(dev, true); + return __pm_runtime_resume(dev, RPM_GET_PUT); } static inline int pm_runtime_put(struct device *dev) { - return __pm_runtime_put(dev, false); + return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); +} + +static inline int pm_runtime_put_autosuspend(struct device *dev) +{ + return __pm_runtime_suspend(dev, + RPM_GET_PUT | RPM_ASYNC | RPM_AUTO); } static inline int pm_runtime_put_sync(struct device *dev) { - return __pm_runtime_put(dev, true); + return __pm_runtime_idle(dev, RPM_GET_PUT); +} + +static inline int pm_runtime_put_sync_autosuspend(struct device *dev) +{ + return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_AUTO); } static inline int pm_runtime_set_active(struct device *dev) @@ -140,4 +215,14 @@ static inline void pm_runtime_disable(struct device *dev) __pm_runtime_disable(dev, true); } +static inline void pm_runtime_use_autosuspend(struct device *dev) +{ + __pm_runtime_use_autosuspend(dev, true); +} + +static inline void pm_runtime_dont_use_autosuspend(struct device *dev) +{ + __pm_runtime_use_autosuspend(dev, false); +} + #endif diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 76aca48722ae..9cff00dd6b63 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -2,6 +2,7 @@ * pm_wakeup.h - Power management wakeup interface * * Copyright (C) 2008 Alan Stern + * Copyright (C) 2010 Rafael J. Wysocki, Novell Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -27,19 +28,77 @@ #include <linux/types.h> -#ifdef CONFIG_PM - -/* Changes to device_may_wakeup take effect on the next pm state change. +/** + * struct wakeup_source - Representation of wakeup sources * - * By default, most devices should leave wakeup disabled. The exceptions - * are devices that everyone expects to be wakeup sources: keyboards, - * power buttons, possibly network interfaces, etc. + * @total_time: Total time this wakeup source has been active. + * @max_time: Maximum time this wakeup source has been continuously active. + * @last_time: Monotonic clock when the wakeup source's was activated last time. + * @event_count: Number of signaled wakeup events. + * @active_count: Number of times the wakeup sorce was activated. + * @relax_count: Number of times the wakeup sorce was deactivated. + * @hit_count: Number of times the wakeup sorce might abort system suspend. + * @active: Status of the wakeup source. */ -static inline void device_init_wakeup(struct device *dev, bool val) +struct wakeup_source { + char *name; + struct list_head entry; + spinlock_t lock; + struct timer_list timer; + unsigned long timer_expires; + ktime_t total_time; + ktime_t max_time; + ktime_t last_time; + unsigned long event_count; + unsigned long active_count; + unsigned long relax_count; + unsigned long hit_count; + unsigned int active:1; +}; + +#ifdef CONFIG_PM_SLEEP + +/* + * Changes to device_may_wakeup take effect on the next pm state change. + */ + +static inline void device_set_wakeup_capable(struct device *dev, bool capable) +{ + dev->power.can_wakeup = capable; +} + +static inline bool device_can_wakeup(struct device *dev) +{ + return dev->power.can_wakeup; +} + + + +static inline bool device_may_wakeup(struct device *dev) { - dev->power.can_wakeup = dev->power.should_wakeup = val; + return dev->power.can_wakeup && !!dev->power.wakeup; } +/* drivers/base/power/wakeup.c */ +extern struct wakeup_source *wakeup_source_create(const char *name); +extern void wakeup_source_destroy(struct wakeup_source *ws); +extern void wakeup_source_add(struct wakeup_source *ws); +extern void wakeup_source_remove(struct wakeup_source *ws); +extern struct wakeup_source *wakeup_source_register(const char *name); +extern void wakeup_source_unregister(struct wakeup_source *ws); +extern int device_wakeup_enable(struct device *dev); +extern int device_wakeup_disable(struct device *dev); +extern int device_init_wakeup(struct device *dev, bool val); +extern int device_set_wakeup_enable(struct device *dev, bool enable); +extern void __pm_stay_awake(struct wakeup_source *ws); +extern void pm_stay_awake(struct device *dev); +extern void __pm_relax(struct wakeup_source *ws); +extern void pm_relax(struct device *dev); +extern void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec); +extern void pm_wakeup_event(struct device *dev, unsigned int msec); + +#else /* !CONFIG_PM_SLEEP */ + static inline void device_set_wakeup_capable(struct device *dev, bool capable) { dev->power.can_wakeup = capable; @@ -50,43 +109,63 @@ static inline bool device_can_wakeup(struct device *dev) return dev->power.can_wakeup; } -static inline void device_set_wakeup_enable(struct device *dev, bool enable) +static inline bool device_may_wakeup(struct device *dev) { - dev->power.should_wakeup = enable; + return false; } -static inline bool device_may_wakeup(struct device *dev) +static inline struct wakeup_source *wakeup_source_create(const char *name) { - return dev->power.can_wakeup && dev->power.should_wakeup; + return NULL; } -#else /* !CONFIG_PM */ +static inline void wakeup_source_destroy(struct wakeup_source *ws) {} + +static inline void wakeup_source_add(struct wakeup_source *ws) {} -/* For some reason the following routines work even without CONFIG_PM */ -static inline void device_init_wakeup(struct device *dev, bool val) +static inline void wakeup_source_remove(struct wakeup_source *ws) {} + +static inline struct wakeup_source *wakeup_source_register(const char *name) { - dev->power.can_wakeup = val; + return NULL; } -static inline void device_set_wakeup_capable(struct device *dev, bool capable) +static inline void wakeup_source_unregister(struct wakeup_source *ws) {} + +static inline int device_wakeup_enable(struct device *dev) { - dev->power.can_wakeup = capable; + return -EINVAL; } -static inline bool device_can_wakeup(struct device *dev) +static inline int device_wakeup_disable(struct device *dev) { - return dev->power.can_wakeup; + return 0; } -static inline void device_set_wakeup_enable(struct device *dev, bool enable) +static inline int device_init_wakeup(struct device *dev, bool val) { + dev->power.can_wakeup = val; + return val ? -EINVAL : 0; } -static inline bool device_may_wakeup(struct device *dev) + +static inline int device_set_wakeup_enable(struct device *dev, bool enable) { - return false; + return -EINVAL; } -#endif /* !CONFIG_PM */ +static inline void __pm_stay_awake(struct wakeup_source *ws) {} + +static inline void pm_stay_awake(struct device *dev) {} + +static inline void __pm_relax(struct wakeup_source *ws) {} + +static inline void pm_relax(struct device *dev) {} + +static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) {} + +static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {} + +#endif /* !CONFIG_PM_SLEEP */ #endif /* _LINUX_PM_WAKEUP_H */ diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 634b8e674ac5..a39cbed9ee17 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -47,6 +47,8 @@ static inline void *radix_tree_indirect_to_ptr(void *ptr) { return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); } +#define radix_tree_indirect_to_ptr(ptr) \ + radix_tree_indirect_to_ptr((void __force *)(ptr)) static inline int radix_tree_is_indirect_ptr(void *ptr) { @@ -61,7 +63,7 @@ static inline int radix_tree_is_indirect_ptr(void *ptr) struct radix_tree_root { unsigned int height; gfp_t gfp_mask; - struct radix_tree_node *rnode; + struct radix_tree_node __rcu *rnode; }; #define RADIX_TREE_INIT(mask) { \ diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 4ec3b38ce9c5..f31ef61f1c65 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -10,6 +10,21 @@ #include <linux/rcupdate.h> /* + * Why is there no list_empty_rcu()? Because list_empty() serves this + * purpose. The list_empty() function fetches the RCU-protected pointer + * and compares it to the address of the list head, but neither dereferences + * this pointer itself nor provides this pointer to the caller. Therefore, + * it is not necessary to use rcu_dereference(), so that list_empty() can + * be used anywhere you would want to use a list_empty_rcu(). + */ + +/* + * return the ->next pointer of a list_head in an rcu safe + * way, we must not access it directly + */ +#define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next))) + +/* * Insert a new entry between two known consecutive entries. * * This is only for internal list manipulation where we know @@ -20,7 +35,7 @@ static inline void __list_add_rcu(struct list_head *new, { new->next = next; new->prev = prev; - rcu_assign_pointer(prev->next, new); + rcu_assign_pointer(list_next_rcu(prev), new); next->prev = new; } @@ -138,7 +153,7 @@ static inline void list_replace_rcu(struct list_head *old, { new->next = old->next; new->prev = old->prev; - rcu_assign_pointer(new->prev->next, new); + rcu_assign_pointer(list_next_rcu(new->prev), new); new->next->prev = new; old->prev = LIST_POISON2; } @@ -193,7 +208,7 @@ static inline void list_splice_init_rcu(struct list_head *list, */ last->next = at; - rcu_assign_pointer(head->next, first); + rcu_assign_pointer(list_next_rcu(head), first); first->prev = head; at->prev = last; } @@ -208,7 +223,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ #define list_entry_rcu(ptr, type, member) \ - container_of(rcu_dereference_raw(ptr), type, member) + ({typeof (*ptr) __rcu *__ptr = (typeof (*ptr) __rcu __force *)ptr; \ + container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \ + }) /** * list_first_entry_rcu - get the first element from a list @@ -225,9 +242,9 @@ static inline void list_splice_init_rcu(struct list_head *list, list_entry_rcu((ptr)->next, type, member) #define __list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference_raw((head)->next); \ + for (pos = rcu_dereference_raw(list_next_rcu(head)); \ pos != (head); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(list_next_rcu((pos))) /** * list_for_each_entry_rcu - iterate over rcu list of given type @@ -257,9 +274,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_continue_rcu(pos, head) \ - for ((pos) = rcu_dereference_raw((pos)->next); \ + for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \ prefetch((pos)->next), (pos) != (head); \ - (pos) = rcu_dereference_raw((pos)->next)) + (pos) = rcu_dereference_raw(list_next_rcu(pos))) /** * list_for_each_entry_continue_rcu - continue iteration over list of given type @@ -314,12 +331,19 @@ static inline void hlist_replace_rcu(struct hlist_node *old, new->next = next; new->pprev = old->pprev; - rcu_assign_pointer(*new->pprev, new); + rcu_assign_pointer(*(struct hlist_node __rcu **)new->pprev, new); if (next) new->next->pprev = &new->next; old->pprev = LIST_POISON2; } +/* + * return the first or the next element in an RCU protected hlist + */ +#define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first))) +#define hlist_next_rcu(node) (*((struct hlist_node __rcu **)(&(node)->next))) +#define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev))) + /** * hlist_add_head_rcu * @n: the element to add to the hash list. @@ -346,7 +370,7 @@ static inline void hlist_add_head_rcu(struct hlist_node *n, n->next = first; n->pprev = &h->first; - rcu_assign_pointer(h->first, n); + rcu_assign_pointer(hlist_first_rcu(h), n); if (first) first->pprev = &n->next; } @@ -374,7 +398,7 @@ static inline void hlist_add_before_rcu(struct hlist_node *n, { n->pprev = next->pprev; n->next = next; - rcu_assign_pointer(*(n->pprev), n); + rcu_assign_pointer(hlist_pprev_rcu(n), n); next->pprev = &n->next; } @@ -401,15 +425,15 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, { n->next = prev->next; n->pprev = &prev->next; - rcu_assign_pointer(prev->next, n); + rcu_assign_pointer(hlist_next_rcu(prev), n); if (n->next) n->next->pprev = &n->next; } -#define __hlist_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->first); \ - pos && ({ prefetch(pos->next); 1; }); \ - pos = rcu_dereference(pos->next)) +#define __hlist_for_each_rcu(pos, head) \ + for (pos = rcu_dereference(hlist_first_rcu(head)); \ + pos && ({ prefetch(pos->next); 1; }); \ + pos = rcu_dereference(hlist_next_rcu(pos))) /** * hlist_for_each_entry_rcu - iterate over rcu list of given type @@ -422,11 +446,11 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference_raw((head)->first); \ +#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \ pos && ({ prefetch(pos->next); 1; }) && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(hlist_next_rcu(pos))) /** * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index b70ffe53cb9f..2ae13714828b 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -37,6 +37,12 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) } } +#define hlist_nulls_first_rcu(head) \ + (*((struct hlist_nulls_node __rcu __force **)&(head)->first)) + +#define hlist_nulls_next_rcu(node) \ + (*((struct hlist_nulls_node __rcu __force **)&(node)->next)) + /** * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization * @n: the element to delete from the hash list. @@ -88,7 +94,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, n->next = first; n->pprev = &h->first; - rcu_assign_pointer(h->first, n); + rcu_assign_pointer(hlist_nulls_first_rcu(h), n); if (!is_a_nulls(first)) first->pprev = &n->next; } @@ -100,11 +106,11 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, * @member: the name of the hlist_nulls_node within the struct. * */ -#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference_raw((head)->first); \ - (!is_a_nulls(pos)) && \ +#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ + (!is_a_nulls(pos)) && \ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) #endif #endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9fbc54a2585d..03cda7bed985 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -41,11 +41,15 @@ #include <linux/lockdep.h> #include <linux/completion.h> #include <linux/debugobjects.h> +#include <linux/compiler.h> #ifdef CONFIG_RCU_TORTURE_TEST extern int rcutorture_runnable; /* for sysctl */ #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ +#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) +#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) + /** * struct rcu_head - callback structure for use with RCU * @next: next update requests in a list @@ -57,29 +61,94 @@ struct rcu_head { }; /* Exported common interfaces */ -extern void rcu_barrier(void); +extern void call_rcu_sched(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)); +extern void synchronize_sched(void); extern void rcu_barrier_bh(void); extern void rcu_barrier_sched(void); extern void synchronize_sched_expedited(void); extern int sched_expedited_torture_stats(char *page); +static inline void __rcu_read_lock_bh(void) +{ + local_bh_disable(); +} + +static inline void __rcu_read_unlock_bh(void) +{ + local_bh_enable(); +} + +#ifdef CONFIG_PREEMPT_RCU + +extern void __rcu_read_lock(void); +extern void __rcu_read_unlock(void); +void synchronize_rcu(void); + +/* + * Defined as a macro as it is a very low level header included from + * areas that don't even know about current. This gives the rcu_read_lock() + * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other + * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. + */ +#define rcu_preempt_depth() (current->rcu_read_lock_nesting) + +#else /* #ifdef CONFIG_PREEMPT_RCU */ + +static inline void __rcu_read_lock(void) +{ + preempt_disable(); +} + +static inline void __rcu_read_unlock(void) +{ + preempt_enable(); +} + +static inline void synchronize_rcu(void) +{ + synchronize_sched(); +} + +static inline int rcu_preempt_depth(void) +{ + return 0; +} + +#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ + /* Internal to kernel */ extern void rcu_init(void); +extern void rcu_sched_qs(int cpu); +extern void rcu_bh_qs(int cpu); +extern void rcu_check_callbacks(int cpu, int user); +struct notifier_block; + +#ifdef CONFIG_NO_HZ + +extern void rcu_enter_nohz(void); +extern void rcu_exit_nohz(void); + +#else /* #ifdef CONFIG_NO_HZ */ + +static inline void rcu_enter_nohz(void) +{ +} + +static inline void rcu_exit_nohz(void) +{ +} + +#endif /* #else #ifdef CONFIG_NO_HZ */ #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include <linux/rcutree.h> -#elif defined(CONFIG_TINY_RCU) +#elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) #include <linux/rcutiny.h> #else #error "Unknown RCU implementation specified to kernel configuration" #endif -#define RCU_HEAD_INIT { .next = NULL, .func = NULL } -#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT -#define INIT_RCU_HEAD(ptr) do { \ - (ptr)->next = NULL; (ptr)->func = NULL; \ -} while (0) - /* * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic * initialization and destruction of rcu_head on the stack. rcu_head structures @@ -120,14 +189,15 @@ extern struct lockdep_map rcu_sched_lock_map; extern int debug_lockdep_rcu_enabled(void); /** - * rcu_read_lock_held - might we be in RCU read-side critical section? + * rcu_read_lock_held() - might we be in RCU read-side critical section? * * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, * this assumes we are in an RCU read-side critical section unless it can - * prove otherwise. + * prove otherwise. This is useful for debug checks in functions that + * require that they be called within an RCU read-side critical section. * - * Check debug_lockdep_rcu_enabled() to prevent false positives during boot + * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot * and while lockdep is disabled. */ static inline int rcu_read_lock_held(void) @@ -144,14 +214,16 @@ static inline int rcu_read_lock_held(void) extern int rcu_read_lock_bh_held(void); /** - * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section? + * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? * * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an * RCU-sched read-side critical section. In absence of * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side * critical section unless it can prove otherwise. Note that disabling * of preemption (including disabling irqs) counts as an RCU-sched - * read-side critical section. + * read-side critical section. This is useful for debug checks in functions + * that required that they be called within an RCU-sched read-side + * critical section. * * Check debug_lockdep_rcu_enabled() to prevent false positives during boot * and while lockdep is disabled. @@ -211,7 +283,11 @@ static inline int rcu_read_lock_sched_held(void) extern int rcu_my_thread_group_empty(void); -#define __do_rcu_dereference_check(c) \ +/** + * rcu_lockdep_assert - emit lockdep splat if specified condition not met + * @c: condition to check + */ +#define rcu_lockdep_assert(c) \ do { \ static bool __warned; \ if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ @@ -220,41 +296,163 @@ extern int rcu_my_thread_group_empty(void); } \ } while (0) +#else /* #ifdef CONFIG_PROVE_RCU */ + +#define rcu_lockdep_assert(c) do { } while (0) + +#endif /* #else #ifdef CONFIG_PROVE_RCU */ + +/* + * Helper functions for rcu_dereference_check(), rcu_dereference_protected() + * and rcu_assign_pointer(). Some of these could be folded into their + * callers, but they are left separate in order to ease introduction of + * multiple flavors of pointers to match the multiple flavors of RCU + * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in + * the future. + */ + +#ifdef __CHECKER__ +#define rcu_dereference_sparse(p, space) \ + ((void)(((typeof(*p) space *)p) == p)) +#else /* #ifdef __CHECKER__ */ +#define rcu_dereference_sparse(p, space) +#endif /* #else #ifdef __CHECKER__ */ + +#define __rcu_access_pointer(p, space) \ + ({ \ + typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ + rcu_dereference_sparse(p, space); \ + ((typeof(*p) __force __kernel *)(_________p1)); \ + }) +#define __rcu_dereference_check(p, c, space) \ + ({ \ + typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ + rcu_lockdep_assert(c); \ + rcu_dereference_sparse(p, space); \ + smp_read_barrier_depends(); \ + ((typeof(*p) __force __kernel *)(_________p1)); \ + }) +#define __rcu_dereference_protected(p, c, space) \ + ({ \ + rcu_lockdep_assert(c); \ + rcu_dereference_sparse(p, space); \ + ((typeof(*p) __force __kernel *)(p)); \ + }) + +#define __rcu_dereference_index_check(p, c) \ + ({ \ + typeof(p) _________p1 = ACCESS_ONCE(p); \ + rcu_lockdep_assert(c); \ + smp_read_barrier_depends(); \ + (_________p1); \ + }) +#define __rcu_assign_pointer(p, v, space) \ + ({ \ + if (!__builtin_constant_p(v) || \ + ((v) != NULL)) \ + smp_wmb(); \ + (p) = (typeof(*v) __force space *)(v); \ + }) + + +/** + * rcu_access_pointer() - fetch RCU pointer with no dereferencing + * @p: The pointer to read + * + * Return the value of the specified RCU-protected pointer, but omit the + * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful + * when the value of this pointer is accessed, but the pointer is not + * dereferenced, for example, when testing an RCU-protected pointer against + * NULL. Although rcu_access_pointer() may also be used in cases where + * update-side locks prevent the value of the pointer from changing, you + * should instead use rcu_dereference_protected() for this use case. + */ +#define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu) + /** - * rcu_dereference_check - rcu_dereference with debug checking + * rcu_dereference_check() - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing * @c: The conditions under which the dereference will take place * * Do an rcu_dereference(), but check that the conditions under which the - * dereference will take place are correct. Typically the conditions indicate - * the various locking conditions that should be held at that point. The check - * should return true if the conditions are satisfied. + * dereference will take place are correct. Typically the conditions + * indicate the various locking conditions that should be held at that + * point. The check should return true if the conditions are satisfied. + * An implicit check for being in an RCU read-side critical section + * (rcu_read_lock()) is included. * * For example: * - * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || - * lockdep_is_held(&foo->lock)); + * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock)); * * could be used to indicate to lockdep that foo->bar may only be dereferenced - * if either the RCU read lock is held, or that the lock required to replace + * if either rcu_read_lock() is held, or that the lock required to replace * the bar struct at foo->bar is held. * * Note that the list of conditions may also include indications of when a lock * need not be held, for example during initialisation or destruction of the * target struct: * - * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || - * lockdep_is_held(&foo->lock) || + * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) || * atomic_read(&foo->usage) == 0); + * + * Inserts memory barriers on architectures that require them + * (currently only the Alpha), prevents the compiler from refetching + * (and from merging fetches), and, more importantly, documents exactly + * which pointers are protected by RCU and checks that the pointer is + * annotated as __rcu. */ #define rcu_dereference_check(p, c) \ - ({ \ - __do_rcu_dereference_check(c); \ - rcu_dereference_raw(p); \ - }) + __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu) + +/** + * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-bh counterpart to rcu_dereference_check(). + */ +#define rcu_dereference_bh_check(p, c) \ + __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu) /** - * rcu_dereference_protected - fetch RCU pointer when updates prevented + * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-sched counterpart to rcu_dereference_check(). + */ +#define rcu_dereference_sched_check(p, c) \ + __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \ + __rcu) + +#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ + +/** + * rcu_dereference_index_check() - rcu_dereference for indices with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * Similar to rcu_dereference_check(), but omits the sparse checking. + * This allows rcu_dereference_index_check() to be used on integers, + * which can then be used as array indices. Attempting to use + * rcu_dereference_check() on an integer will give compiler warnings + * because the sparse address-space mechanism relies on dereferencing + * the RCU-protected pointer. Dereferencing integers is not something + * that even gcc will put up with. + * + * Note that this function does not implicitly check for RCU read-side + * critical sections. If this function gains lots of uses, it might + * make sense to provide versions for each flavor of RCU, but it does + * not make sense as of early 2010. + */ +#define rcu_dereference_index_check(p, c) \ + __rcu_dereference_index_check((p), (c)) + +/** + * rcu_dereference_protected() - fetch RCU pointer when updates prevented + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place * * Return the value of the specified RCU-protected pointer, but omit * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This @@ -263,35 +461,61 @@ extern int rcu_my_thread_group_empty(void); * prevent the compiler from repeating this reference or combining it * with other references, so it should not be used without protection * of appropriate locks. + * + * This function is only for update-side use. Using this function + * when protected only by rcu_read_lock() will result in infrequent + * but very ugly failures. */ #define rcu_dereference_protected(p, c) \ - ({ \ - __do_rcu_dereference_check(c); \ - (p); \ - }) + __rcu_dereference_protected((p), (c), __rcu) -#else /* #ifdef CONFIG_PROVE_RCU */ +/** + * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-bh counterpart to rcu_dereference_protected(). + */ +#define rcu_dereference_bh_protected(p, c) \ + __rcu_dereference_protected((p), (c), __rcu) -#define rcu_dereference_check(p, c) rcu_dereference_raw(p) -#define rcu_dereference_protected(p, c) (p) +/** + * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-sched counterpart to rcu_dereference_protected(). + */ +#define rcu_dereference_sched_protected(p, c) \ + __rcu_dereference_protected((p), (c), __rcu) -#endif /* #else #ifdef CONFIG_PROVE_RCU */ /** - * rcu_access_pointer - fetch RCU pointer with no dereferencing + * rcu_dereference() - fetch RCU-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing * - * Return the value of the specified RCU-protected pointer, but omit the - * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful - * when the value of this pointer is accessed, but the pointer is not - * dereferenced, for example, when testing an RCU-protected pointer against - * NULL. This may also be used in cases where update-side locks prevent - * the value of the pointer from changing, but rcu_dereference_protected() - * is a lighter-weight primitive for this use case. + * This is a simple wrapper around rcu_dereference_check(). + */ +#define rcu_dereference(p) rcu_dereference_check(p, 0) + +/** + * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing + * + * Makes rcu_dereference_check() do the dirty work. + */ +#define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0) + +/** + * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing + * + * Makes rcu_dereference_check() do the dirty work. */ -#define rcu_access_pointer(p) ACCESS_ONCE(p) +#define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0) /** - * rcu_read_lock - mark the beginning of an RCU read-side critical section. + * rcu_read_lock() - mark the beginning of an RCU read-side critical section * * When synchronize_rcu() is invoked on one CPU while other CPUs * are within RCU read-side critical sections, then the @@ -302,7 +526,7 @@ extern int rcu_my_thread_group_empty(void); * until after the all the other CPUs exit their critical sections. * * Note, however, that RCU callbacks are permitted to run concurrently - * with RCU read-side critical sections. One way that this can happen + * with new RCU read-side critical sections. One way that this can happen * is via the following sequence of events: (1) CPU 0 enters an RCU * read-side critical section, (2) CPU 1 invokes call_rcu() to register * an RCU callback, (3) CPU 0 exits the RCU read-side critical section, @@ -317,7 +541,20 @@ extern int rcu_my_thread_group_empty(void); * will be deferred until the outermost RCU read-side critical section * completes. * - * It is illegal to block while in an RCU read-side critical section. + * You can avoid reading and understanding the next paragraph by + * following this rule: don't put anything in an rcu_read_lock() RCU + * read-side critical section that would block in a !PREEMPT kernel. + * But if you want the full story, read on! + * + * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), it + * is illegal to block while in an RCU read-side critical section. In + * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU) + * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may + * be preempted, but explicit blocking is illegal. Finally, in preemptible + * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds, + * RCU read-side critical sections may be preempted and they may also + * block, but only when acquiring spinlocks that are subject to priority + * inheritance. */ static inline void rcu_read_lock(void) { @@ -337,7 +574,7 @@ static inline void rcu_read_lock(void) */ /** - * rcu_read_unlock - marks the end of an RCU read-side critical section. + * rcu_read_unlock() - marks the end of an RCU read-side critical section. * * See rcu_read_lock() for more information. */ @@ -349,15 +586,16 @@ static inline void rcu_read_unlock(void) } /** - * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section + * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section * * This is equivalent of rcu_read_lock(), but to be used when updates - * are being done using call_rcu_bh(). Since call_rcu_bh() callbacks - * consider completion of a softirq handler to be a quiescent state, - * a process in RCU read-side critical section must be protected by - * disabling softirqs. Read-side critical sections in interrupt context - * can use just rcu_read_lock(). - * + * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since + * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a + * softirq handler to be a quiescent state, a process in RCU read-side + * critical section must be protected by disabling softirqs. Read-side + * critical sections in interrupt context can use just rcu_read_lock(), + * though this should at least be commented to avoid confusing people + * reading the code. */ static inline void rcu_read_lock_bh(void) { @@ -379,13 +617,12 @@ static inline void rcu_read_unlock_bh(void) } /** - * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section + * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section * - * Should be used with either - * - synchronize_sched() - * or - * - call_rcu_sched() and rcu_barrier_sched() - * on the write-side to insure proper synchronization. + * This is equivalent of rcu_read_lock(), but to be used when updates + * are being done using call_rcu_sched() or synchronize_rcu_sched(). + * Read-side critical sections can also be introduced by anything that + * disables preemption, including local_irq_disable() and friends. */ static inline void rcu_read_lock_sched(void) { @@ -420,54 +657,14 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) preempt_enable_notrace(); } - /** - * rcu_dereference_raw - fetch an RCU-protected pointer + * rcu_assign_pointer() - assign to RCU-protected pointer + * @p: pointer to assign to + * @v: value to assign (publish) * - * The caller must be within some flavor of RCU read-side critical - * section, or must be otherwise preventing the pointer from changing, - * for example, by holding an appropriate lock. This pointer may later - * be safely dereferenced. It is the caller's responsibility to have - * done the right thing, as this primitive does no checking of any kind. - * - * Inserts memory barriers on architectures that require them - * (currently only the Alpha), and, more importantly, documents - * exactly which pointers are protected by RCU. - */ -#define rcu_dereference_raw(p) ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ - smp_read_barrier_depends(); \ - (_________p1); \ - }) - -/** - * rcu_dereference - fetch an RCU-protected pointer, checking for RCU - * - * Makes rcu_dereference_check() do the dirty work. - */ -#define rcu_dereference(p) \ - rcu_dereference_check(p, rcu_read_lock_held()) - -/** - * rcu_dereference_bh - fetch an RCU-protected pointer, checking for RCU-bh - * - * Makes rcu_dereference_check() do the dirty work. - */ -#define rcu_dereference_bh(p) \ - rcu_dereference_check(p, rcu_read_lock_bh_held()) - -/** - * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched - * - * Makes rcu_dereference_check() do the dirty work. - */ -#define rcu_dereference_sched(p) \ - rcu_dereference_check(p, rcu_read_lock_sched_held()) - -/** - * rcu_assign_pointer - assign (publicize) a pointer to a newly - * initialized structure that will be dereferenced by RCU read-side - * critical sections. Returns the value assigned. + * Assigns the specified value to the specified RCU-protected + * pointer, ensuring that any concurrent RCU readers will see + * any prior initialization. Returns the value assigned. * * Inserts memory barriers on architectures that require them * (pretty much all of them other than x86), and also prevents @@ -476,14 +673,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * call documents which pointers will be dereferenced by RCU read-side * code. */ - #define rcu_assign_pointer(p, v) \ - ({ \ - if (!__builtin_constant_p(v) || \ - ((v) != NULL)) \ - smp_wmb(); \ - (p) = (v); \ - }) + __rcu_assign_pointer((p), (v), __rcu) + +/** + * RCU_INIT_POINTER() - initialize an RCU protected pointer + * + * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep + * splats. + */ +#define RCU_INIT_POINTER(p, v) \ + p = (typeof(*v) __force __rcu *)(v) /* Infrastructure to implement the synchronize_() primitives. */ @@ -494,26 +694,37 @@ struct rcu_synchronize { extern void wakeme_after_rcu(struct rcu_head *head); +#ifdef CONFIG_PREEMPT_RCU + /** - * call_rcu - Queue an RCU callback for invocation after a grace period. + * call_rcu() - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. - * @func: actual update function to be invoked after the grace period + * @func: actual callback function to be invoked after the grace period * - * The update function will be invoked some time after a full grace - * period elapses, in other words after all currently executing RCU - * read-side critical sections have completed. RCU read-side critical + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all pre-existing RCU read-side + * critical sections have completed. However, the callback function + * might well execute concurrently with RCU read-side critical sections + * that started after call_rcu() was invoked. RCU read-side critical * sections are delimited by rcu_read_lock() and rcu_read_unlock(), * and may be nested. */ extern void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head)); +#else /* #ifdef CONFIG_PREEMPT_RCU */ + +/* In classic RCU, call_rcu() is just call_rcu_sched(). */ +#define call_rcu call_rcu_sched + +#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ + /** - * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. + * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. - * @func: actual update function to be invoked after the grace period + * @func: actual callback function to be invoked after the grace period * - * The update function will be invoked some time after a full grace + * The callback function will be invoked some time after a full grace * period elapses, in other words after all currently executing RCU * read-side critical sections have completed. call_rcu_bh() assumes * that the read-side critical sections end on completion of a softirq @@ -566,37 +777,4 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head) } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ -#ifndef CONFIG_PROVE_RCU -#define __do_rcu_dereference_check(c) do { } while (0) -#endif /* #ifdef CONFIG_PROVE_RCU */ - -#define __rcu_dereference_index_check(p, c) \ - ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ - __do_rcu_dereference_check(c); \ - smp_read_barrier_depends(); \ - (_________p1); \ - }) - -/** - * rcu_dereference_index_check() - rcu_dereference for indices with debug checking - * @p: The pointer to read, prior to dereferencing - * @c: The conditions under which the dereference will take place - * - * Similar to rcu_dereference_check(), but omits the sparse checking. - * This allows rcu_dereference_index_check() to be used on integers, - * which can then be used as array indices. Attempting to use - * rcu_dereference_check() on an integer will give compiler warnings - * because the sparse address-space mechanism relies on dereferencing - * the RCU-protected pointer. Dereferencing integers is not something - * that even gcc will put up with. - * - * Note that this function does not implicitly check for RCU read-side - * critical sections. If this function gains lots of uses, it might - * make sense to provide versions for each flavor of RCU, but it does - * not make sense as of early 2010. - */ -#define rcu_dereference_index_check(p, c) \ - __rcu_dereference_index_check((p), (c)) - #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index e2e893144a84..13877cb93a60 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -27,103 +27,101 @@ #include <linux/cache.h> -void rcu_sched_qs(int cpu); -void rcu_bh_qs(int cpu); -static inline void rcu_note_context_switch(int cpu) -{ - rcu_sched_qs(cpu); -} +#define rcu_init_sched() do { } while (0) -#define __rcu_read_lock() preempt_disable() -#define __rcu_read_unlock() preempt_enable() -#define __rcu_read_lock_bh() local_bh_disable() -#define __rcu_read_unlock_bh() local_bh_enable() -#define call_rcu_sched call_rcu +#ifdef CONFIG_TINY_RCU -#define rcu_init_sched() do { } while (0) -extern void rcu_check_callbacks(int cpu, int user); +static inline void synchronize_rcu_expedited(void) +{ + synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */ +} -static inline int rcu_needs_cpu(int cpu) +static inline void rcu_barrier(void) { - return 0; + rcu_barrier_sched(); /* Only one CPU, so only one list of callbacks! */ } -/* - * Return the number of grace periods. - */ -static inline long rcu_batches_completed(void) +#else /* #ifdef CONFIG_TINY_RCU */ + +void rcu_barrier(void); +void synchronize_rcu_expedited(void); + +#endif /* #else #ifdef CONFIG_TINY_RCU */ + +static inline void synchronize_rcu_bh(void) { - return 0; + synchronize_sched(); } -/* - * Return the number of bottom-half grace periods. - */ -static inline long rcu_batches_completed_bh(void) +static inline void synchronize_rcu_bh_expedited(void) { - return 0; + synchronize_sched(); } -static inline void rcu_force_quiescent_state(void) +#ifdef CONFIG_TINY_RCU + +static inline void rcu_preempt_note_context_switch(void) { } -static inline void rcu_bh_force_quiescent_state(void) +static inline void exit_rcu(void) { } -static inline void rcu_sched_force_quiescent_state(void) +static inline int rcu_needs_cpu(int cpu) { + return 0; } -extern void synchronize_sched(void); +#else /* #ifdef CONFIG_TINY_RCU */ + +void rcu_preempt_note_context_switch(void); +extern void exit_rcu(void); +int rcu_preempt_needs_cpu(void); -static inline void synchronize_rcu(void) +static inline int rcu_needs_cpu(int cpu) { - synchronize_sched(); + return rcu_preempt_needs_cpu(); } -static inline void synchronize_rcu_bh(void) +#endif /* #else #ifdef CONFIG_TINY_RCU */ + +static inline void rcu_note_context_switch(int cpu) { - synchronize_sched(); + rcu_sched_qs(cpu); + rcu_preempt_note_context_switch(); } -static inline void synchronize_rcu_expedited(void) +/* + * Return the number of grace periods. + */ +static inline long rcu_batches_completed(void) { - synchronize_sched(); + return 0; } -static inline void synchronize_rcu_bh_expedited(void) +/* + * Return the number of bottom-half grace periods. + */ +static inline long rcu_batches_completed_bh(void) { - synchronize_sched(); + return 0; } -struct notifier_block; - -#ifdef CONFIG_NO_HZ - -extern void rcu_enter_nohz(void); -extern void rcu_exit_nohz(void); - -#else /* #ifdef CONFIG_NO_HZ */ - -static inline void rcu_enter_nohz(void) +static inline void rcu_force_quiescent_state(void) { } -static inline void rcu_exit_nohz(void) +static inline void rcu_bh_force_quiescent_state(void) { } -#endif /* #else #ifdef CONFIG_NO_HZ */ - -static inline void exit_rcu(void) +static inline void rcu_sched_force_quiescent_state(void) { } -static inline int rcu_preempt_depth(void) +static inline void rcu_cpu_stall_reset(void) { - return 0; } #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index c0ed1c056f29..95518e628794 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -30,64 +30,23 @@ #ifndef __LINUX_RCUTREE_H #define __LINUX_RCUTREE_H -struct notifier_block; - -extern void rcu_sched_qs(int cpu); -extern void rcu_bh_qs(int cpu); extern void rcu_note_context_switch(int cpu); extern int rcu_needs_cpu(int cpu); +extern void rcu_cpu_stall_reset(void); #ifdef CONFIG_TREE_PREEMPT_RCU -extern void __rcu_read_lock(void); -extern void __rcu_read_unlock(void); -extern void synchronize_rcu(void); extern void exit_rcu(void); -/* - * Defined as macro as it is a very low level header - * included from areas that don't even know about current - */ -#define rcu_preempt_depth() (current->rcu_read_lock_nesting) - #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -static inline void __rcu_read_lock(void) -{ - preempt_disable(); -} - -static inline void __rcu_read_unlock(void) -{ - preempt_enable(); -} - -#define synchronize_rcu synchronize_sched - static inline void exit_rcu(void) { } -static inline int rcu_preempt_depth(void) -{ - return 0; -} - #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ -static inline void __rcu_read_lock_bh(void) -{ - local_bh_disable(); -} -static inline void __rcu_read_unlock_bh(void) -{ - local_bh_enable(); -} - -extern void call_rcu_sched(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)); extern void synchronize_rcu_bh(void); -extern void synchronize_sched(void); extern void synchronize_rcu_expedited(void); static inline void synchronize_rcu_bh_expedited(void) @@ -95,7 +54,7 @@ static inline void synchronize_rcu_bh_expedited(void) synchronize_sched_expedited(); } -extern void rcu_check_callbacks(int cpu, int user); +extern void rcu_barrier(void); extern long rcu_batches_completed(void); extern long rcu_batches_completed_bh(void); @@ -104,18 +63,6 @@ extern void rcu_force_quiescent_state(void); extern void rcu_bh_force_quiescent_state(void); extern void rcu_sched_force_quiescent_state(void); -#ifdef CONFIG_NO_HZ -void rcu_enter_nohz(void); -void rcu_exit_nohz(void); -#else /* CONFIG_NO_HZ */ -static inline void rcu_enter_nohz(void) -{ -} -static inline void rcu_exit_nohz(void) -{ -} -#endif /* CONFIG_NO_HZ */ - /* A context switch is a grace period for RCU-sched and RCU-bh. */ static inline int rcu_blocking_is_gp(void) { diff --git a/include/linux/rds.h b/include/linux/rds.h index 24bce3ded9ea..91950950aa59 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -36,15 +36,6 @@ #include <linux/types.h> -/* These sparse annotated types shouldn't be in any user - * visible header file. We should clean this up rather - * than kludging around them. */ -#ifndef __KERNEL__ -#define __be16 u_int16_t -#define __be32 u_int32_t -#define __be64 u_int64_t -#endif - #define RDS_IB_ABI_VERSION 0x301 /* @@ -82,6 +73,10 @@ #define RDS_CMSG_RDMA_MAP 3 #define RDS_CMSG_RDMA_STATUS 4 #define RDS_CMSG_CONG_UPDATE 5 +#define RDS_CMSG_ATOMIC_FADD 6 +#define RDS_CMSG_ATOMIC_CSWP 7 +#define RDS_CMSG_MASKED_ATOMIC_FADD 8 +#define RDS_CMSG_MASKED_ATOMIC_CSWP 9 #define RDS_INFO_FIRST 10000 #define RDS_INFO_COUNTERS 10000 @@ -98,9 +93,9 @@ #define RDS_INFO_LAST 10010 struct rds_info_counter { - u_int8_t name[32]; - u_int64_t value; -} __packed; + uint8_t name[32]; + uint64_t value; +} __attribute__((packed)); #define RDS_INFO_CONNECTION_FLAG_SENDING 0x01 #define RDS_INFO_CONNECTION_FLAG_CONNECTING 0x02 @@ -109,56 +104,48 @@ struct rds_info_counter { #define TRANSNAMSIZ 16 struct rds_info_connection { - u_int64_t next_tx_seq; - u_int64_t next_rx_seq; - __be32 laddr; - __be32 faddr; - u_int8_t transport[TRANSNAMSIZ]; /* null term ascii */ - u_int8_t flags; -} __packed; - -struct rds_info_flow { + uint64_t next_tx_seq; + uint64_t next_rx_seq; __be32 laddr; __be32 faddr; - u_int32_t bytes; - __be16 lport; - __be16 fport; -} __packed; + uint8_t transport[TRANSNAMSIZ]; /* null term ascii */ + uint8_t flags; +} __attribute__((packed)); #define RDS_INFO_MESSAGE_FLAG_ACK 0x01 #define RDS_INFO_MESSAGE_FLAG_FAST_ACK 0x02 struct rds_info_message { - u_int64_t seq; - u_int32_t len; + uint64_t seq; + uint32_t len; __be32 laddr; __be32 faddr; __be16 lport; __be16 fport; - u_int8_t flags; -} __packed; + uint8_t flags; +} __attribute__((packed)); struct rds_info_socket { - u_int32_t sndbuf; + uint32_t sndbuf; __be32 bound_addr; __be32 connected_addr; __be16 bound_port; __be16 connected_port; - u_int32_t rcvbuf; - u_int64_t inum; -} __packed; + uint32_t rcvbuf; + uint64_t inum; +} __attribute__((packed)); struct rds_info_tcp_socket { __be32 local_addr; __be16 local_port; __be32 peer_addr; __be16 peer_port; - u_int64_t hdr_rem; - u_int64_t data_rem; - u_int32_t last_sent_nxt; - u_int32_t last_expected_una; - u_int32_t last_seen_una; -} __packed; + uint64_t hdr_rem; + uint64_t data_rem; + uint32_t last_sent_nxt; + uint32_t last_expected_una; + uint32_t last_seen_una; +} __attribute__((packed)); #define RDS_IB_GID_LEN 16 struct rds_info_rdma_connection { @@ -212,42 +199,69 @@ struct rds_info_rdma_connection { * (so that the application does not have to worry about * alignment). */ -typedef u_int64_t rds_rdma_cookie_t; +typedef uint64_t rds_rdma_cookie_t; struct rds_iovec { - u_int64_t addr; - u_int64_t bytes; + uint64_t addr; + uint64_t bytes; }; struct rds_get_mr_args { struct rds_iovec vec; - u_int64_t cookie_addr; + uint64_t cookie_addr; uint64_t flags; }; struct rds_get_mr_for_dest_args { struct sockaddr_storage dest_addr; struct rds_iovec vec; - u_int64_t cookie_addr; + uint64_t cookie_addr; uint64_t flags; }; struct rds_free_mr_args { rds_rdma_cookie_t cookie; - u_int64_t flags; + uint64_t flags; }; struct rds_rdma_args { rds_rdma_cookie_t cookie; struct rds_iovec remote_vec; - u_int64_t local_vec_addr; - u_int64_t nr_local; - u_int64_t flags; - u_int64_t user_token; + uint64_t local_vec_addr; + uint64_t nr_local; + uint64_t flags; + uint64_t user_token; +}; + +struct rds_atomic_args { + rds_rdma_cookie_t cookie; + uint64_t local_addr; + uint64_t remote_addr; + union { + struct { + uint64_t compare; + uint64_t swap; + } cswp; + struct { + uint64_t add; + } fadd; + struct { + uint64_t compare; + uint64_t swap; + uint64_t compare_mask; + uint64_t swap_mask; + } m_cswp; + struct { + uint64_t add; + uint64_t nocarry_mask; + } m_fadd; + }; + uint64_t flags; + uint64_t user_token; }; struct rds_rdma_notify { - u_int64_t user_token; + uint64_t user_token; int32_t status; }; @@ -266,5 +280,6 @@ struct rds_rdma_notify { #define RDS_RDMA_USE_ONCE 0x0008 /* free MR after use */ #define RDS_RDMA_DONTWAIT 0x0010 /* Don't wait in SET_BARRIER */ #define RDS_RDMA_NOTIFY_ME 0x0020 /* Notify when operation completes */ +#define RDS_RDMA_SILENT 0x0040 /* Do not interrupt remote */ #endif /* IB_RDS_H */ diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h index bc8c3881c729..f31db2368782 100644 --- a/include/linux/resume-trace.h +++ b/include/linux/resume-trace.h @@ -3,6 +3,7 @@ #ifdef CONFIG_PM_TRACE #include <asm/resume-trace.h> +#include <linux/types.h> extern int pm_trace_enabled; @@ -14,6 +15,7 @@ static inline int pm_trace_is_enabled(void) struct device; extern void set_trace_device(struct device *); extern void generate_resume_trace(const void *tracedata, unsigned int user); +extern int show_trace_dev_match(char *buf, size_t size); #define TRACE_DEVICE(dev) do { \ if (pm_trace_enabled) \ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 58d44491880f..d42f274418b8 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -6,6 +6,7 @@ #include <linux/if_link.h> #include <linux/if_addr.h> #include <linux/neighbour.h> +#include <linux/netdevice.h> /* rtnetlink families. Values up to 127 are reserved for real address * families, values above 128 may be used arbitrarily. @@ -749,6 +750,35 @@ extern int rtnl_is_locked(void); extern int lockdep_rtnl_is_held(void); #endif /* #ifdef CONFIG_PROVE_LOCKING */ +/** + * rcu_dereference_rtnl - rcu_dereference with debug checking + * @p: The pointer to read, prior to dereferencing + * + * Do an rcu_dereference(p), but check caller either holds rcu_read_lock() + * or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference() + */ +#define rcu_dereference_rtnl(p) \ + rcu_dereference_check(p, rcu_read_lock_held() || \ + lockdep_rtnl_is_held()) + +/** + * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL + * @p: The pointer to read, prior to dereferencing + * + * Return the value of the specified RCU-protected pointer, but omit + * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because + * caller holds RTNL. + */ +#define rtnl_dereference(p) \ + rcu_dereference_protected(p, lockdep_rtnl_is_held()) + +static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) +{ + return rtnl_dereference(dev->ingress_queue); +} + +extern struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); + extern void rtnetlink_init(void); extern void __rtnl_unlock(void); diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e2a6db2d7dd..56154bbb8da9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -336,6 +336,9 @@ extern unsigned long sysctl_hung_task_warnings; extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +#else +/* Avoid need for ifdefs elsewhere in the code */ +enum { sysctl_hung_task_timeout_secs = 0 }; #endif /* Attach to any functions which should be ignored in wchan output. */ @@ -875,6 +878,7 @@ enum sched_domain_level { SD_LV_NONE = 0, SD_LV_SIBLING, SD_LV_MC, + SD_LV_BOOK, SD_LV_CPU, SD_LV_NODE, SD_LV_ALLNODES, @@ -1160,6 +1164,13 @@ struct sched_rt_entity { struct rcu_node; +enum perf_event_task_context { + perf_invalid_context = -1, + perf_hw_context = 0, + perf_sw_context, + perf_nr_task_contexts, +}; + struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; @@ -1202,11 +1213,13 @@ struct task_struct { unsigned int policy; cpumask_t cpus_allowed; -#ifdef CONFIG_TREE_PREEMPT_RCU +#ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; char rcu_read_unlock_special; - struct rcu_node *rcu_blocked_node; struct list_head rcu_node_entry; +#endif /* #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_TREE_PREEMPT_RCU + struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) @@ -1288,9 +1301,9 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - const struct cred *real_cred; /* objective and real subjective task + const struct cred __rcu *real_cred; /* objective and real subjective task * credentials (COW) */ - const struct cred *cred; /* effective (overridable) subjective task + const struct cred __rcu *cred; /* effective (overridable) subjective task * credentials (COW) */ struct mutex cred_guard_mutex; /* guard against foreign influences on * credential calculations @@ -1418,7 +1431,7 @@ struct task_struct { #endif #ifdef CONFIG_CGROUPS /* Control Group info protected by css_set_lock */ - struct css_set *cgroups; + struct css_set __rcu *cgroups; /* cg_list protected by css_set_lock and tsk->alloc_lock */ struct list_head cg_list; #endif @@ -1431,7 +1444,7 @@ struct task_struct { struct futex_pi_state *pi_state_cache; #endif #ifdef CONFIG_PERF_EVENTS - struct perf_event_context *perf_event_ctxp; + struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; struct mutex perf_event_mutex; struct list_head perf_event_list; #endif @@ -1681,8 +1694,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * /* * Per process flags */ -#define PF_ALIGNWARN 0x00000001 /* Print alignment warning msgs */ - /* Not implemented yet, only for 486*/ +#define PF_KSOFTIRQD 0x00000001 /* I am ksoftirqd */ #define PF_STARTING 0x00000002 /* being created */ #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ @@ -1740,7 +1752,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) -#ifdef CONFIG_TREE_PREEMPT_RCU +#ifdef CONFIG_PREEMPT_RCU #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ @@ -1749,7 +1761,9 @@ static inline void rcu_copy_process(struct task_struct *p) { p->rcu_read_lock_nesting = 0; p->rcu_read_unlock_special = 0; +#ifdef CONFIG_TREE_PREEMPT_RCU p->rcu_blocked_node = NULL; +#endif INIT_LIST_HEAD(&p->rcu_node_entry); } @@ -1826,6 +1840,19 @@ extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); #endif +#ifdef CONFIG_IRQ_TIME_ACCOUNTING +/* + * An i/f to runtime opt-in for irq time accounting based off of sched_clock. + * The reason for this explicit opt-in is not to have perf penalty with + * slow sched_clocks. + */ +extern void enable_sched_clock_irqtime(void); +extern void disable_sched_clock_irqtime(void); +#else +static inline void enable_sched_clock_irqtime(void) {} +static inline void disable_sched_clock_irqtime(void) {} +#endif + extern unsigned long long task_sched_runtime(struct task_struct *task); extern unsigned long long thread_group_sched_runtime(struct task_struct *task); @@ -2367,9 +2394,9 @@ extern int __cond_resched_lock(spinlock_t *lock); extern int __cond_resched_softirq(void); -#define cond_resched_softirq() ({ \ - __might_sleep(__FILE__, __LINE__, SOFTIRQ_OFFSET); \ - __cond_resched_softirq(); \ +#define cond_resched_softirq() ({ \ + __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ + __cond_resched_softirq(); \ }) /* diff --git a/include/linux/security.h b/include/linux/security.h index a22219afff09..b8246a8df7d2 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -74,7 +74,7 @@ extern int cap_file_mmap(struct file *file, unsigned long reqprot, extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); -extern int cap_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp); +extern int cap_task_setscheduler(struct task_struct *p); extern int cap_task_setioprio(struct task_struct *p, int ioprio); extern int cap_task_setnice(struct task_struct *p, int nice); extern int cap_syslog(int type, bool from_file); @@ -959,6 +959,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Sets the new child socket's sid to the openreq sid. * @inet_conn_established: * Sets the connection's peersid to the secmark on skb. + * @secmark_relabel_packet: + * check if the process should be allowed to relabel packets to the given secid + * @security_secmark_refcount_inc + * tells the LSM to increment the number of secmark labeling rules loaded + * @security_secmark_refcount_dec + * tells the LSM to decrement the number of secmark labeling rules loaded * @req_classify_flow: * Sets the flow's sid to the openreq sid. * @tun_dev_create: @@ -1279,9 +1285,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Return 0 if permission is granted. * * @secid_to_secctx: - * Convert secid to security context. + * Convert secid to security context. If secdata is NULL the length of + * the result will be returned in seclen, but no secdata will be returned. + * This does mean that the length could change between calls to check the + * length and the next call which actually allocates and returns the secdata. * @secid contains the security ID. * @secdata contains the pointer that stores the converted security context. + * @seclen pointer which contains the length of the data * @secctx_to_secid: * Convert security context to secid. * @secid contains the pointer to the generated security ID. @@ -1501,8 +1511,7 @@ struct security_operations { int (*task_getioprio) (struct task_struct *p); int (*task_setrlimit) (struct task_struct *p, unsigned int resource, struct rlimit *new_rlim); - int (*task_setscheduler) (struct task_struct *p, int policy, - struct sched_param *lp); + int (*task_setscheduler) (struct task_struct *p); int (*task_getscheduler) (struct task_struct *p); int (*task_movememory) (struct task_struct *p); int (*task_kill) (struct task_struct *p, @@ -1594,6 +1603,9 @@ struct security_operations { struct request_sock *req); void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req); void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb); + int (*secmark_relabel_packet) (u32 secid); + void (*secmark_refcount_inc) (void); + void (*secmark_refcount_dec) (void); void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl); int (*tun_dev_create)(void); void (*tun_dev_post_create)(struct sock *sk); @@ -1752,8 +1764,7 @@ int security_task_setioprio(struct task_struct *p, int ioprio); int security_task_getioprio(struct task_struct *p); int security_task_setrlimit(struct task_struct *p, unsigned int resource, struct rlimit *new_rlim); -int security_task_setscheduler(struct task_struct *p, - int policy, struct sched_param *lp); +int security_task_setscheduler(struct task_struct *p); int security_task_getscheduler(struct task_struct *p); int security_task_movememory(struct task_struct *p); int security_task_kill(struct task_struct *p, struct siginfo *info, @@ -2320,11 +2331,9 @@ static inline int security_task_setrlimit(struct task_struct *p, return 0; } -static inline int security_task_setscheduler(struct task_struct *p, - int policy, - struct sched_param *lp) +static inline int security_task_setscheduler(struct task_struct *p) { - return cap_task_setscheduler(p, policy, lp); + return cap_task_setscheduler(p); } static inline int security_task_getscheduler(struct task_struct *p) @@ -2551,6 +2560,9 @@ void security_inet_csk_clone(struct sock *newsk, const struct request_sock *req); void security_inet_conn_established(struct sock *sk, struct sk_buff *skb); +int security_secmark_relabel_packet(u32 secid); +void security_secmark_refcount_inc(void); +void security_secmark_refcount_dec(void); int security_tun_dev_create(void); void security_tun_dev_post_create(struct sock *sk); int security_tun_dev_attach(struct sock *sk); @@ -2705,6 +2717,19 @@ static inline void security_inet_conn_established(struct sock *sk, { } +static inline int security_secmark_relabel_packet(u32 secid) +{ + return 0; +} + +static inline void security_secmark_refcount_inc(void) +{ +} + +static inline void security_secmark_refcount_dec(void) +{ +} + static inline int security_tun_dev_create(void) { return 0; diff --git a/include/linux/selection.h b/include/linux/selection.h index 8cdaa1151d2e..85193aa8c1e3 100644 --- a/include/linux/selection.h +++ b/include/linux/selection.h @@ -39,5 +39,6 @@ extern void putconsxy(struct vc_data *vc, unsigned char *p); extern u16 vcs_scr_readw(struct vc_data *vc, const u16 *org); extern void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org); +extern void vcs_scr_updated(struct vc_data *vc); #endif diff --git a/include/linux/selinux.h b/include/linux/selinux.h index 82e0f26a1299..44f459612690 100644 --- a/include/linux/selinux.h +++ b/include/linux/selinux.h @@ -21,74 +21,11 @@ struct kern_ipc_perm; #ifdef CONFIG_SECURITY_SELINUX /** - * selinux_string_to_sid - map a security context string to a security ID - * @str: the security context string to be mapped - * @sid: ID value returned via this. - * - * Returns 0 if successful, with the SID stored in sid. A value - * of zero for sid indicates no SID could be determined (but no error - * occurred). - */ -int selinux_string_to_sid(char *str, u32 *sid); - -/** - * selinux_secmark_relabel_packet_permission - secmark permission check - * @sid: SECMARK ID value to be applied to network packet - * - * Returns 0 if the current task is allowed to set the SECMARK label of - * packets with the supplied security ID. Note that it is implicit that - * the packet is always being relabeled from the default unlabeled value, - * and that the access control decision is made in the AVC. - */ -int selinux_secmark_relabel_packet_permission(u32 sid); - -/** - * selinux_secmark_refcount_inc - increments the secmark use counter - * - * SELinux keeps track of the current SECMARK targets in use so it knows - * when to apply SECMARK label access checks to network packets. This - * function incements this reference count to indicate that a new SECMARK - * target has been configured. - */ -void selinux_secmark_refcount_inc(void); - -/** - * selinux_secmark_refcount_dec - decrements the secmark use counter - * - * SELinux keeps track of the current SECMARK targets in use so it knows - * when to apply SECMARK label access checks to network packets. This - * function decements this reference count to indicate that one of the - * existing SECMARK targets has been removed/flushed. - */ -void selinux_secmark_refcount_dec(void); - -/** * selinux_is_enabled - is SELinux enabled? */ bool selinux_is_enabled(void); #else -static inline int selinux_string_to_sid(const char *str, u32 *sid) -{ - *sid = 0; - return 0; -} - -static inline int selinux_secmark_relabel_packet_permission(u32 sid) -{ - return 0; -} - -static inline void selinux_secmark_refcount_inc(void) -{ - return; -} - -static inline void selinux_secmark_refcount_dec(void) -{ - return; -} - static inline bool selinux_is_enabled(void) { return false; diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 7638deaaba65..97f5b45bbc07 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -35,6 +35,8 @@ struct plat_serial8250_port { void (*set_termios)(struct uart_port *, struct ktermios *new, struct ktermios *old); + void (*pm)(struct uart_port *, unsigned int state, + unsigned old); }; /* @@ -76,5 +78,11 @@ extern int serial8250_find_port_for_earlycon(void); extern int setup_early_serial8250_console(char *cmdline); extern void serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, struct ktermios *old); +extern void serial8250_do_pm(struct uart_port *port, unsigned int state, + unsigned int oldstate); + +extern void serial8250_set_isa_configurator(void (*v) + (int port, struct uart_port *up, + unsigned short *capabilities)); #endif diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 563e23400913..99e5994e6f84 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -289,6 +289,8 @@ struct uart_port { void (*set_termios)(struct uart_port *, struct ktermios *new, struct ktermios *old); + void (*pm)(struct uart_port *, unsigned int state, + unsigned int old); unsigned int irq; /* irq number */ unsigned long irqflags; /* irq flags */ unsigned int uartclk; /* base uart clock */ @@ -411,6 +413,14 @@ unsigned int uart_get_baud_rate(struct uart_port *port, struct ktermios *termios unsigned int max); unsigned int uart_get_divisor(struct uart_port *port, unsigned int baud); +/* Base timer interval for polling */ +static inline int uart_poll_timeout(struct uart_port *port) +{ + int timeout = port->timeout; + + return timeout > 6 ? (timeout / 2 - 2) : 1; +} + /* * Console helpers. */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 77eb60d2b496..e6ba898de61c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -129,8 +129,13 @@ typedef struct skb_frag_struct skb_frag_t; struct skb_frag_struct { struct page *page; +#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) __u32 page_offset; __u32 size; +#else + __u16 page_offset; + __u16 size; +#endif }; #define HAVE_HW_TIME_STAMP @@ -163,26 +168,19 @@ struct skb_shared_hwtstamps { ktime_t syststamp; }; -/** - * struct skb_shared_tx - instructions for time stamping of outgoing packets - * @hardware: generate hardware time stamp - * @software: generate software time stamp - * @in_progress: device driver is going to provide - * hardware time stamp - * @prevent_sk_orphan: make sk reference available on driver level - * @flags: all shared_tx flags - * - * These flags are attached to packets as part of the - * &skb_shared_info. Use skb_tx() to get a pointer. - */ -union skb_shared_tx { - struct { - __u8 hardware:1, - software:1, - in_progress:1, - prevent_sk_orphan:1; - }; - __u8 flags; +/* Definitions for tx_flags in struct skb_shared_info */ +enum { + /* generate hardware time stamp */ + SKBTX_HW_TSTAMP = 1 << 0, + + /* generate software time stamp */ + SKBTX_SW_TSTAMP = 1 << 1, + + /* device driver is going to provide hardware time stamp */ + SKBTX_IN_PROGRESS = 1 << 2, + + /* ensure the originating sk reference is available on driver level */ + SKBTX_DRV_NEEDS_SK_REF = 1 << 3, }; /* This data is invariant across clones and lives at @@ -195,7 +193,7 @@ struct skb_shared_info { unsigned short gso_segs; unsigned short gso_type; __be32 ip6_frag_id; - union skb_shared_tx tx_flags; + __u8 tx_flags; struct sk_buff *frag_list; struct skb_shared_hwtstamps hwtstamps; @@ -462,19 +460,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) skb->_skb_refdst = (unsigned long)dst; } -/** - * skb_dst_set_noref - sets skb dst, without a reference - * @skb: buffer - * @dst: dst entry - * - * Sets skb dst, assuming a reference was not taken on dst - * skb_dst_drop() should not dst_release() this dst - */ -static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) -{ - WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); - skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; -} +extern void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst); /** * skb_dst_is_noref - Test if skb dst isnt refcounted @@ -498,13 +484,13 @@ extern struct sk_buff *__alloc_skb(unsigned int size, static inline struct sk_buff *alloc_skb(unsigned int size, gfp_t priority) { - return __alloc_skb(size, priority, 0, -1); + return __alloc_skb(size, priority, 0, NUMA_NO_NODE); } static inline struct sk_buff *alloc_skb_fclone(unsigned int size, gfp_t priority) { - return __alloc_skb(size, priority, 1, -1); + return __alloc_skb(size, priority, 1, NUMA_NO_NODE); } extern bool skb_recycle_check(struct sk_buff *skb, int skb_size); @@ -558,6 +544,15 @@ extern unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config, struct ts_state *state); +extern __u32 __skb_get_rxhash(struct sk_buff *skb); +static inline __u32 skb_get_rxhash(struct sk_buff *skb) +{ + if (!skb->rxhash) + skb->rxhash = __skb_get_rxhash(skb); + + return skb->rxhash; +} + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { @@ -578,11 +573,6 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) return &skb_shinfo(skb)->hwtstamps; } -static inline union skb_shared_tx *skb_tx(struct sk_buff *skb) -{ - return &skb_shinfo(skb)->tx_flags; -} - /** * skb_queue_empty - check if a queue is empty * @list: queue head @@ -604,7 +594,7 @@ static inline int skb_queue_empty(const struct sk_buff_head *list) static inline bool skb_queue_is_last(const struct sk_buff_head *list, const struct sk_buff *skb) { - return (skb->next == (struct sk_buff *) list); + return skb->next == (struct sk_buff *)list; } /** @@ -617,7 +607,7 @@ static inline bool skb_queue_is_last(const struct sk_buff_head *list, static inline bool skb_queue_is_first(const struct sk_buff_head *list, const struct sk_buff *skb) { - return (skb->prev == (struct sk_buff *) list); + return skb->prev == (struct sk_buff *)list; } /** @@ -1123,7 +1113,7 @@ extern void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, int size); #define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) -#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_has_frags(skb)) +#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_has_frag_list(skb)) #define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) #ifdef NET_SKBUFF_DATA_USES_OFFSET @@ -1561,13 +1551,25 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, return skb; } -extern struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask); +/** + * __netdev_alloc_page - allocate a page for ps-rx on a specific device + * @dev: network device to receive on + * @gfp_mask: alloc_pages_node mask + * + * Allocate a new page. dev currently unused. + * + * %NULL is returned if there is no free memory. + */ +static inline struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask) +{ + return alloc_pages_node(NUMA_NO_NODE, gfp_mask, 0); +} /** * netdev_alloc_page - allocate a page for ps-rx on a specific device * @dev: network device to receive on * - * Allocate a new page node local to the specified device. + * Allocate a new page. dev currently unused. * * %NULL is returned if there is no free memory. */ @@ -1787,7 +1789,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) skb = skb->prev) -static inline bool skb_has_frags(const struct sk_buff *skb) +static inline bool skb_has_frag_list(const struct sk_buff *skb) { return skb_shinfo(skb)->frag_list != NULL; } @@ -1987,8 +1989,8 @@ extern void skb_tstamp_tx(struct sk_buff *orig_skb, static inline void sw_tx_timestamp(struct sk_buff *skb) { - union skb_shared_tx *shtx = skb_tx(skb); - if (shtx->software && !shtx->in_progress) + if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP && + !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) skb_tstamp_tx(skb, NULL); } @@ -2159,7 +2161,7 @@ static inline u16 skb_get_rx_queue(const struct sk_buff *skb) static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) { - return (skb->queue_mapping != 0); + return skb->queue_mapping != 0; } extern u16 skb_tx_hash(const struct net_device *dev, @@ -2209,6 +2211,21 @@ static inline void skb_forward_csum(struct sk_buff *skb) skb->ip_summed = CHECKSUM_NONE; } +/** + * skb_checksum_none_assert - make sure skb ip_summed is CHECKSUM_NONE + * @skb: skb to check + * + * fresh skbs have their ip_summed set to CHECKSUM_NONE. + * Instead of forcing ip_summed to CHECKSUM_NONE, we can + * use this helper, to document places where we make this assertion. + */ +static inline void skb_checksum_none_assert(struct sk_buff *skb) +{ +#ifdef DEBUG + BUG_ON(skb->ip_summed != CHECKSUM_NONE); +#endif +} + bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h index 2ea1dd1ba21c..291f721144c2 100644 --- a/include/linux/smp_lock.h +++ b/include/linux/smp_lock.h @@ -54,12 +54,15 @@ static inline void cycle_kernel_lock(void) #else +#ifdef CONFIG_BKL /* provoke build bug if not set */ #define lock_kernel() #define unlock_kernel() -#define release_kernel_lock(task) do { } while(0) #define cycle_kernel_lock() do { } while(0) -#define reacquire_kernel_lock(task) 0 #define kernel_locked() 1 +#endif /* CONFIG_BKL */ + +#define release_kernel_lock(task) do { } while(0) +#define reacquire_kernel_lock(task) 0 #endif /* CONFIG_LOCK_KERNEL */ #endif /* __LINUX_SMPLOCK_H */ diff --git a/include/linux/socket.h b/include/linux/socket.h index a8f56e1ec760..5146b50202ce 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -326,7 +326,6 @@ extern long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *a extern int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len); extern int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata, int offset, int len); -extern int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, int __user *ulen); extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index f8854655860e..80e535897de6 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -50,6 +50,7 @@ #include <linux/preempt.h> #include <linux/linkage.h> #include <linux/compiler.h> +#include <linux/irqflags.h> #include <linux/thread_info.h> #include <linux/kernel.h> #include <linux/stringify.h> diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 4d5d2f546dbf..58971e891f48 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -108,19 +108,43 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ /** - * srcu_dereference - fetch SRCU-protected pointer with checking + * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing + * @p: the pointer to fetch and protect for later dereferencing + * @sp: pointer to the srcu_struct, which is used to check that we + * really are in an SRCU read-side critical section. + * @c: condition to check for update-side use * - * Makes rcu_dereference_check() do the dirty work. + * If PROVE_RCU is enabled, invoking this outside of an RCU read-side + * critical section will result in an RCU-lockdep splat, unless @c evaluates + * to 1. The @c argument will normally be a logical expression containing + * lockdep_is_held() calls. */ -#define srcu_dereference(p, sp) \ - rcu_dereference_check(p, srcu_read_lock_held(sp)) +#define srcu_dereference_check(p, sp, c) \ + __rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu) + +/** + * srcu_dereference - fetch SRCU-protected pointer for later dereferencing + * @p: the pointer to fetch and protect for later dereferencing + * @sp: pointer to the srcu_struct, which is used to check that we + * really are in an SRCU read-side critical section. + * + * Makes rcu_dereference_check() do the dirty work. If PROVE_RCU + * is enabled, invoking this outside of an RCU read-side critical + * section will result in an RCU-lockdep splat. + */ +#define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0) /** * srcu_read_lock - register a new reader for an SRCU-protected structure. * @sp: srcu_struct in which to register the new reader. * * Enter an SRCU read-side critical section. Note that SRCU read-side - * critical sections may be nested. + * critical sections may be nested. However, it is illegal to + * call anything that waits on an SRCU grace period for the same + * srcu_struct, whether directly or indirectly. Please note that + * one way to indirectly wait on an SRCU grace period is to acquire + * a mutex that is held elsewhere while calling synchronize_srcu() or + * synchronize_srcu_expedited(). */ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) { diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h index a6d5225b9275..11daf9c140e7 100644 --- a/include/linux/ssb/ssb_regs.h +++ b/include/linux/ssb/ssb_regs.h @@ -97,6 +97,7 @@ #define SSB_TMSLOW_RESET 0x00000001 /* Reset */ #define SSB_TMSLOW_REJECT_22 0x00000002 /* Reject (Backplane rev 2.2) */ #define SSB_TMSLOW_REJECT_23 0x00000004 /* Reject (Backplane rev 2.3) */ +#define SSB_TMSLOW_PHYCLK 0x00000010 /* MAC PHY Clock Control Enable */ #define SSB_TMSLOW_CLOCK 0x00010000 /* Clock Enable */ #define SSB_TMSLOW_FGC 0x00020000 /* Force Gated Clocks On */ #define SSB_TMSLOW_PE 0x40000000 /* Power Management Enable */ diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 632ff7c03280..d66c61774d95 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -32,10 +32,14 @@ struct plat_stmmacenet_data { int bus_id; int pbl; + int clk_csr; int has_gmac; int enh_desc; + int tx_coe; + int bugged_jumbo; + int pmt; void (*fix_mac_speed)(void *priv, unsigned int speed); - void (*bus_setup)(unsigned long ioaddr); + void (*bus_setup)(void __iomem *ioaddr); #ifdef CONFIG_STM_DRIVERS struct stm_pad_config *pad_config; #endif diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 6b524a0d02e4..1808960c5059 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -126,8 +126,8 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); #else /* CONFIG_STOP_MACHINE && CONFIG_SMP */ -static inline int stop_machine(int (*fn)(void *), void *data, - const struct cpumask *cpus) +static inline int __stop_machine(int (*fn)(void *), void *data, + const struct cpumask *cpus) { int ret; local_irq_disable(); @@ -136,5 +136,11 @@ static inline int stop_machine(int (*fn)(void *), void *data, return ret; } +static inline int stop_machine(int (*fn)(void *), void *data, + const struct cpumask *cpus) +{ + return __stop_machine(fn, data, cpus); +} + #endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */ #endif /* _LINUX_STOP_MACHINE */ diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 671538d25bc1..8eee9dbbfe7a 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -69,7 +69,7 @@ struct gss_cl_ctx { enum rpc_gss_proc gc_proc; u32 gc_seq; spinlock_t gc_seq_lock; - struct gss_ctx *gc_gss_ctx; + struct gss_ctx __rcu *gc_gss_ctx; struct xdr_netobj gc_wire_ctx; u32 gc_win; unsigned long gc_expiry; @@ -80,7 +80,7 @@ struct gss_upcall_msg; struct gss_cred { struct rpc_cred gc_base; enum rpc_gss_svc gc_service; - struct gss_cl_ctx *gc_ctx; + struct gss_cl_ctx __rcu *gc_ctx; struct gss_upcall_msg *gc_upcall; unsigned long gc_upcall_timestamp; unsigned char gc_machine_cred : 1; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 4af270ec2204..26697514c5ec 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -293,8 +293,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb); extern bool events_check_enabled; extern bool pm_check_wakeup_events(void); -extern bool pm_get_wakeup_count(unsigned long *count); -extern bool pm_save_wakeup_count(unsigned long count); +extern bool pm_get_wakeup_count(unsigned int *count); +extern bool pm_save_wakeup_count(unsigned int count); #else /* !CONFIG_PM_SLEEP */ static inline int register_pm_notifier(struct notifier_block *nb) @@ -308,6 +308,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) } #define pm_notifier(fn, pri) do { (void)(fn); } while (0) + +static inline bool pm_check_wakeup_events(void) { return true; } #endif /* !CONFIG_PM_SLEEP */ extern struct mutex pm_mutex; diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 96eb576d82fd..30b881555fa5 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -164,6 +164,10 @@ int sysfs_add_file_to_group(struct kobject *kobj, const struct attribute *attr, const char *group); void sysfs_remove_file_from_group(struct kobject *kobj, const struct attribute *attr, const char *group); +int sysfs_merge_group(struct kobject *kobj, + const struct attribute_group *grp); +void sysfs_unmerge_group(struct kobject *kobj, + const struct attribute_group *grp); void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr); void sysfs_notify_dirent(struct sysfs_dirent *sd); @@ -302,6 +306,17 @@ static inline void sysfs_remove_file_from_group(struct kobject *kobj, { } +static inline int sysfs_merge_group(struct kobject *kobj, + const struct attribute_group *grp) +{ + return 0; +} + +static inline void sysfs_unmerge_group(struct kobject *kobj, + const struct attribute_group *grp) +{ +} + static inline void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr) { diff --git a/include/linux/tc_act/Kbuild b/include/linux/tc_act/Kbuild index 76990937f4c9..67b501c302b2 100644 --- a/include/linux/tc_act/Kbuild +++ b/include/linux/tc_act/Kbuild @@ -4,3 +4,4 @@ header-y += tc_mirred.h header-y += tc_pedit.h header-y += tc_nat.h header-y += tc_skbedit.h +header-y += tc_csum.h diff --git a/include/linux/tc_act/tc_csum.h b/include/linux/tc_act/tc_csum.h new file mode 100644 index 000000000000..a047c49a3153 --- /dev/null +++ b/include/linux/tc_act/tc_csum.h @@ -0,0 +1,32 @@ +#ifndef __LINUX_TC_CSUM_H +#define __LINUX_TC_CSUM_H + +#include <linux/types.h> +#include <linux/pkt_cls.h> + +#define TCA_ACT_CSUM 16 + +enum { + TCA_CSUM_UNSPEC, + TCA_CSUM_PARMS, + TCA_CSUM_TM, + __TCA_CSUM_MAX +}; +#define TCA_CSUM_MAX (__TCA_CSUM_MAX - 1) + +enum { + TCA_CSUM_UPDATE_FLAG_IPV4HDR = 1, + TCA_CSUM_UPDATE_FLAG_ICMP = 2, + TCA_CSUM_UPDATE_FLAG_IGMP = 4, + TCA_CSUM_UPDATE_FLAG_TCP = 8, + TCA_CSUM_UPDATE_FLAG_UDP = 16, + TCA_CSUM_UPDATE_FLAG_UDPLITE = 32 +}; + +struct tc_csum { + tc_gen; + + __u32 update_flags; +}; + +#endif /* __LINUX_TC_CSUM_H */ diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h index 0864206ec1a3..7138962664f8 100644 --- a/include/linux/tc_ematch/tc_em_meta.h +++ b/include/linux/tc_ematch/tc_em_meta.h @@ -79,6 +79,7 @@ enum { TCF_META_ID_SK_SENDMSG_OFF, TCF_META_ID_SK_WRITE_PENDING, TCF_META_ID_VLAN_TAG, + TCF_META_ID_RXHASH, __TCF_META_ID_MAX }; #define TCF_META_ID_MAX (__TCF_META_ID_MAX - 1) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a778ee024590..e64f4c67d0ef 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -105,6 +105,7 @@ enum { #define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */ #define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ #define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ +#define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */ /* for TCP_INFO socket option */ #define TCPI_OPT_TIMESTAMPS 1 diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index a8cc4e13434c..c90696544176 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -23,12 +23,12 @@ struct restart_block { }; /* For futex_wait and futex_wait_requeue_pi */ struct { - u32 *uaddr; + u32 __user *uaddr; u32 val; u32 flags; u32 bitset; u64 time; - u32 *uaddr2; + u32 __user *uaddr2; } futex; /* For nanosleep */ struct { diff --git a/include/linux/tipc.h b/include/linux/tipc.h index 181c8d0e6f73..d10614b29d59 100644 --- a/include/linux/tipc.h +++ b/include/linux/tipc.h @@ -127,17 +127,23 @@ static inline unsigned int tipc_node(__u32 addr) * TIPC topology subscription service definitions */ -#define TIPC_SUB_SERVICE 0x00 /* Filter for service availability */ -#define TIPC_SUB_PORTS 0x01 /* Filter for port availability */ -#define TIPC_SUB_CANCEL 0x04 /* Cancel a subscription */ +#define TIPC_SUB_PORTS 0x01 /* filter for port availability */ +#define TIPC_SUB_SERVICE 0x02 /* filter for service availability */ +#define TIPC_SUB_CANCEL 0x04 /* cancel a subscription */ +#if 0 +/* The following filter options are not currently implemented */ +#define TIPC_SUB_NO_BIND_EVTS 0x04 /* filter out "publish" events */ +#define TIPC_SUB_NO_UNBIND_EVTS 0x08 /* filter out "withdraw" events */ +#define TIPC_SUB_SINGLE_EVT 0x10 /* expire after first event */ +#endif #define TIPC_WAIT_FOREVER ~0 /* timeout for permanent subscription */ struct tipc_subscr { - struct tipc_name_seq seq; /* NBO. Name sequence of interest */ - __u32 timeout; /* NBO. Subscription duration (in ms) */ - __u32 filter; /* NBO. Bitmask of filter options */ - char usr_handle[8]; /* Opaque. Available for subscriber use */ + struct tipc_name_seq seq; /* name sequence of interest */ + __u32 timeout; /* subscription duration (in ms) */ + __u32 filter; /* bitmask of filter options */ + char usr_handle[8]; /* available for subscriber use */ }; #define TIPC_PUBLISHED 1 /* publication event */ @@ -145,11 +151,11 @@ struct tipc_subscr { #define TIPC_SUBSCR_TIMEOUT 3 /* subscription timeout event */ struct tipc_event { - __u32 event; /* NBO. Event type, as defined above */ - __u32 found_lower; /* NBO. Matching name seq instances */ - __u32 found_upper; /* " " " " " */ - struct tipc_portid port; /* NBO. Associated port */ - struct tipc_subscr s; /* Original, associated subscription */ + __u32 event; /* event type */ + __u32 found_lower; /* matching name seq instances */ + __u32 found_upper; /* " " " " */ + struct tipc_portid port; /* associated port */ + struct tipc_subscr s; /* associated subscription */ }; /* diff --git a/include/linux/topology.h b/include/linux/topology.h index 64e084ff5e5c..b91a40e847d2 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -201,6 +201,12 @@ int arch_update_cpu_topology(void); .balance_interval = 64, \ } +#ifdef CONFIG_SCHED_BOOK +#ifndef SD_BOOK_INIT +#error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!! +#endif +#endif /* CONFIG_SCHED_BOOK */ + #ifdef CONFIG_NUMA #ifndef SD_NODE_INIT #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!! diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 103d1b61aacb..a4a90b6726ce 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -17,6 +17,7 @@ #include <linux/errno.h> #include <linux/types.h> #include <linux/rcupdate.h> +#include <linux/jump_label.h> struct module; struct tracepoint; @@ -145,7 +146,9 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ - if (unlikely(__tracepoint_##name.state)) \ + JUMP_LABEL(&__tracepoint_##name.state, do_trace); \ + return; \ +do_trace: \ __DO_TRACE(&__tracepoint_##name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args)); \ diff --git a/include/linux/tty.h b/include/linux/tty.h index 67d64e6efe7a..86be0cdeb11b 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -256,6 +256,7 @@ struct tty_operations; struct tty_struct { int magic; struct kref kref; + struct device *dev; struct tty_driver *driver; const struct tty_operations *ops; int index; @@ -465,7 +466,7 @@ extern void proc_clear_tty(struct task_struct *p); extern struct tty_struct *get_current_tty(void); extern void tty_default_fops(struct file_operations *fops); extern struct tty_struct *alloc_tty_struct(void); -extern void tty_add_file(struct tty_struct *tty, struct file *file); +extern int tty_add_file(struct tty_struct *tty, struct file *file); extern void free_tty_struct(struct tty_struct *tty); extern void initialize_tty_struct(struct tty_struct *tty, struct tty_driver *driver, int idx); diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index b08677982525..db2d227694da 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -224,6 +224,12 @@ * unless the tty also has a valid tty->termiox pointer. * * Optional: Called under the termios lock + * + * int (*get_icount)(struct tty_struct *tty, struct serial_icounter *icount); + * + * Called when the device receives a TIOCGICOUNT ioctl. Passed a kernel + * structure to complete. This method is optional and will only be called + * if provided (otherwise EINVAL will be returned). */ #include <linux/fs.h> @@ -232,6 +238,7 @@ struct tty_struct; struct tty_driver; +struct serial_icounter_struct; struct tty_operations { struct tty_struct * (*lookup)(struct tty_driver *driver, @@ -268,6 +275,8 @@ struct tty_operations { unsigned int set, unsigned int clear); int (*resize)(struct tty_struct *tty, struct winsize *ws); int (*set_termiox)(struct tty_struct *tty, struct termiox *tnew); + int (*get_icount)(struct tty_struct *tty, + struct serial_icounter_struct *icount); #ifdef CONFIG_CONSOLE_POLL int (*poll_init)(struct tty_driver *driver, int line, char *options); int (*poll_get_char)(struct tty_driver *driver, int line); diff --git a/include/linux/types.h b/include/linux/types.h index 01a082f56ef4..357dbc19606f 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -121,7 +121,15 @@ typedef __u64 u_int64_t; typedef __s64 int64_t; #endif -/* this is a special 64bit data type that is 8-byte aligned */ +/* + * aligned_u64 should be used in defining kernel<->userspace ABIs to avoid + * common 32/64-bit compat problems. + * 64-bit values align to 4-byte boundaries on x86_32 (and possibly other + * architectures) and to 8-byte boundaries on 64-bit architetures. The new + * aligned_64 type enforces 8-byte alignment so that structs containing + * aligned_64 values have the same alignment on 32-bit and 64-bit architectures. + * No conversions are necessary between 32-bit user-space and a 64-bit kernel. + */ #define aligned_u64 __u64 __attribute__((aligned(8))) #define aligned_be64 __be64 __attribute__((aligned(8))) #define aligned_le64 __le64 __attribute__((aligned(8))) @@ -178,6 +186,11 @@ typedef __u64 __bitwise __be64; typedef __u16 __bitwise __sum16; typedef __u32 __bitwise __wsum; +/* this is a special 64bit data type that is 8-byte aligned */ +#define __aligned_u64 __u64 __attribute__((aligned(8))) +#define __aligned_be64 __be64 __attribute__((aligned(8))) +#define __aligned_le64 __le64 __attribute__((aligned(8))) + #ifdef __KERNEL__ typedef unsigned __bitwise__ gfp_t; typedef unsigned __bitwise__ fmode_t; diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 5dcc9ff72f69..d6188e5a52df 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -108,7 +108,7 @@ extern void uio_event_notify(struct uio_info *info); /* defines for uio_info->irq */ #define UIO_IRQ_CUSTOM -1 -#define UIO_IRQ_NONE -2 +#define UIO_IRQ_NONE 0 /* defines for uio_mem->memtype */ #define UIO_MEM_NONE 0 diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h index c117a68d04a7..5e86dc771da4 100644 --- a/include/linux/usb/cdc.h +++ b/include/linux/usb/cdc.h @@ -32,6 +32,8 @@ #define USB_CDC_PROTO_EEM 7 +#define USB_CDC_NCM_PROTO_NTB 1 + /*-------------------------------------------------------------------------*/ /* @@ -274,13 +276,13 @@ struct usb_cdc_notification { /* * Class Specific structures and constants * - * CDC NCM parameter structure, CDC NCM subclass 6.2.1 + * CDC NCM NTB parameters structure, CDC NCM subclass 6.2.1 * */ -struct usb_cdc_ncm_ntb_parameter { +struct usb_cdc_ncm_ntb_parameters { __le16 wLength; - __le16 bmNtbFormatSupported; + __le16 bmNtbFormatsSupported; __le32 dwNtbInMaxSize; __le16 wNdpInDivisor; __le16 wNdpInPayloadRemainder; @@ -297,8 +299,8 @@ struct usb_cdc_ncm_ntb_parameter { * CDC NCM transfer headers, CDC NCM subclass 3.2 */ -#define NCM_NTH16_SIGN 0x484D434E /* NCMH */ -#define NCM_NTH32_SIGN 0x686D636E /* ncmh */ +#define USB_CDC_NCM_NTH16_SIGN 0x484D434E /* NCMH */ +#define USB_CDC_NCM_NTH32_SIGN 0x686D636E /* ncmh */ struct usb_cdc_ncm_nth16 { __le32 dwSignature; @@ -320,25 +322,78 @@ struct usb_cdc_ncm_nth32 { * CDC NCM datagram pointers, CDC NCM subclass 3.3 */ -#define NCM_NDP16_CRC_SIGN 0x314D434E /* NCM1 */ -#define NCM_NDP16_NOCRC_SIGN 0x304D434E /* NCM0 */ -#define NCM_NDP32_CRC_SIGN 0x316D636E /* ncm1 */ -#define NCM_NDP32_NOCRC_SIGN 0x306D636E /* ncm0 */ +#define USB_CDC_NCM_NDP16_CRC_SIGN 0x314D434E /* NCM1 */ +#define USB_CDC_NCM_NDP16_NOCRC_SIGN 0x304D434E /* NCM0 */ +#define USB_CDC_NCM_NDP32_CRC_SIGN 0x316D636E /* ncm1 */ +#define USB_CDC_NCM_NDP32_NOCRC_SIGN 0x306D636E /* ncm0 */ + +/* 16-bit NCM Datagram Pointer Entry */ +struct usb_cdc_ncm_dpe16 { + __le16 wDatagramIndex; + __le16 wDatagramLength; +} __attribute__((__packed__)); +/* 16-bit NCM Datagram Pointer Table */ struct usb_cdc_ncm_ndp16 { __le32 dwSignature; __le16 wLength; __le16 wNextFpIndex; - __u8 data[0]; + struct usb_cdc_ncm_dpe16 dpe16[0]; } __attribute__ ((packed)); +/* 32-bit NCM Datagram Pointer Entry */ +struct usb_cdc_ncm_dpe32 { + __le32 dwDatagramIndex; + __le32 dwDatagramLength; +} __attribute__((__packed__)); + +/* 32-bit NCM Datagram Pointer Table */ struct usb_cdc_ncm_ndp32 { __le32 dwSignature; __le16 wLength; __le16 wReserved6; - __le32 dwNextFpIndex; + __le32 dwNextNdpIndex; __le32 dwReserved12; - __u8 data[0]; + struct usb_cdc_ncm_dpe32 dpe32[0]; } __attribute__ ((packed)); +/* CDC NCM subclass 3.2.1 and 3.2.2 */ +#define USB_CDC_NCM_NDP16_INDEX_MIN 0x000C +#define USB_CDC_NCM_NDP32_INDEX_MIN 0x0010 + +/* CDC NCM subclass 3.3.3 Datagram Formatting */ +#define USB_CDC_NCM_DATAGRAM_FORMAT_CRC 0x30 +#define USB_CDC_NCM_DATAGRAM_FORMAT_NOCRC 0X31 + +/* CDC NCM subclass 4.2 NCM Communications Interface Protocol Code */ +#define USB_CDC_NCM_PROTO_CODE_NO_ENCAP_COMMANDS 0x00 +#define USB_CDC_NCM_PROTO_CODE_EXTERN_PROTO 0xFE + +/* CDC NCM subclass 5.2.1 NCM Functional Descriptor, bmNetworkCapabilities */ +#define USB_CDC_NCM_NCAP_ETH_FILTER (1 << 0) +#define USB_CDC_NCM_NCAP_NET_ADDRESS (1 << 1) +#define USB_CDC_NCM_NCAP_ENCAP_COMMAND (1 << 2) +#define USB_CDC_NCM_NCAP_MAX_DATAGRAM_SIZE (1 << 3) +#define USB_CDC_NCM_NCAP_CRC_MODE (1 << 4) + +/* CDC NCM subclass Table 6-3: NTB Parameter Structure */ +#define USB_CDC_NCM_NTB16_SUPPORTED (1 << 0) +#define USB_CDC_NCM_NTB32_SUPPORTED (1 << 1) + +/* CDC NCM subclass Table 6-3: NTB Parameter Structure */ +#define USB_CDC_NCM_NDP_ALIGN_MIN_SIZE 0x04 +#define USB_CDC_NCM_NTB_MAX_LENGTH 0x1C + +/* CDC NCM subclass 6.2.5 SetNtbFormat */ +#define USB_CDC_NCM_NTB16_FORMAT 0x00 +#define USB_CDC_NCM_NTB32_FORMAT 0x01 + +/* CDC NCM subclass 6.2.7 SetNtbInputSize */ +#define USB_CDC_NCM_NTB_MIN_IN_SIZE 2048 +#define USB_CDC_NCM_NTB_MIN_OUT_SIZE 2048 + +/* CDC NCM subclass 6.2.11 SetCrcMode */ +#define USB_CDC_NCM_CRC_NOT_APPENDED 0x00 +#define USB_CDC_NCM_CRC_APPENDED 0x01 + #endif /* __LINUX_USB_CDC_H */ diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index da2ed77d3e8d..f917bbbc8901 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -123,8 +123,23 @@ #define USB_DEVICE_A_ALT_HNP_SUPPORT 5 /* (otg) other RH port does */ #define USB_DEVICE_DEBUG_MODE 6 /* (special devices only) */ +/* + * New Feature Selectors as added by USB 3.0 + * See USB 3.0 spec Table 9-6 + */ +#define USB_DEVICE_U1_ENABLE 48 /* dev may initiate U1 transition */ +#define USB_DEVICE_U2_ENABLE 49 /* dev may initiate U2 transition */ +#define USB_DEVICE_LTM_ENABLE 50 /* dev may send LTM */ +#define USB_INTRF_FUNC_SUSPEND 0 /* function suspend */ + +#define USB_INTR_FUNC_SUSPEND_OPT_MASK 0xFF00 + #define USB_ENDPOINT_HALT 0 /* IN/OUT will STALL */ +/* Bit array elements as returned by the USB_REQ_GET_STATUS request. */ +#define USB_DEV_STAT_U1_ENABLED 2 /* transition into U1 state */ +#define USB_DEV_STAT_U2_ENABLED 3 /* transition into U2 state */ +#define USB_DEV_STAT_LTM_ENABLED 4 /* Latency tolerance messages */ /** * struct usb_ctrlrequest - SETUP data for a USB device control request @@ -675,6 +690,7 @@ struct usb_bos_descriptor { __u8 bNumDeviceCaps; } __attribute__((packed)); +#define USB_DT_BOS_SIZE 5 /*-------------------------------------------------------------------------*/ /* USB_DT_DEVICE_CAPABILITY: grouped with BOS */ @@ -712,16 +728,56 @@ struct usb_wireless_cap_descriptor { /* Ultra Wide Band */ __u8 bReserved; } __attribute__((packed)); +/* USB 2.0 Extension descriptor */ #define USB_CAP_TYPE_EXT 2 struct usb_ext_cap_descriptor { /* Link Power Management */ __u8 bLength; __u8 bDescriptorType; __u8 bDevCapabilityType; - __u8 bmAttributes; + __le32 bmAttributes; #define USB_LPM_SUPPORT (1 << 1) /* supports LPM */ } __attribute__((packed)); +#define USB_DT_USB_EXT_CAP_SIZE 7 + +/* + * SuperSpeed USB Capability descriptor: Defines the set of SuperSpeed USB + * specific device level capabilities + */ +#define USB_SS_CAP_TYPE 3 +struct usb_ss_cap_descriptor { /* Link Power Management */ + __u8 bLength; + __u8 bDescriptorType; + __u8 bDevCapabilityType; + __u8 bmAttributes; +#define USB_LTM_SUPPORT (1 << 1) /* supports LTM */ + __le16 wSpeedSupported; +#define USB_LOW_SPEED_OPERATION (1) /* Low speed operation */ +#define USB_FULL_SPEED_OPERATION (1 << 1) /* Full speed operation */ +#define USB_HIGH_SPEED_OPERATION (1 << 2) /* High speed operation */ +#define USB_5GBPS_OPERATION (1 << 3) /* Operation at 5Gbps */ + __u8 bFunctionalitySupport; + __u8 bU1devExitLat; + __le16 bU2DevExitLat; +} __attribute__((packed)); + +#define USB_DT_USB_SS_CAP_SIZE 10 + +/* + * Container ID Capability descriptor: Defines the instance unique ID used to + * identify the instance across all operating modes + */ +#define CONTAINER_ID_TYPE 4 +struct usb_ss_container_id_descriptor { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDevCapabilityType; + __u8 bReserved; + __u8 ContainerID[16]; /* 128-bit number */ +} __attribute__((packed)); + +#define USB_DT_USB_SS_CONTN_ID_SIZE 20 /*-------------------------------------------------------------------------*/ /* USB_DT_WIRELESS_ENDPOINT_COMP: companion descriptor associated with @@ -808,4 +864,14 @@ enum usb_device_state { */ }; +/*-------------------------------------------------------------------------*/ + +/* + * As per USB compliance update, a device that is actively drawing + * more than 100mA from USB must report itself as bus-powered in + * the GetStatus(DEVICE) call. + * http://compliance.usb.org/index.asp?UpdateFile=Electrical&Format=Standard#34 + */ +#define USB_SELF_POWER_VBUS_MAX_DRAW 100 + #endif /* __LINUX_USB_CH9_H */ diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 617068134ae8..3d29a7dcac2d 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -161,8 +161,6 @@ ep_choose(struct usb_gadget *g, struct usb_endpoint_descriptor *hs, * and by language IDs provided in control requests. * @descriptors: Table of descriptors preceding all function descriptors. * Examples include OTG and vendor-specific descriptors. - * @bind: Called from @usb_add_config() to allocate resources unique to this - * configuration and to call @usb_add_function() for each function used. * @unbind: Reverses @bind; called as a side effect of unregistering the * driver which added this configuration. * @setup: Used to delegate control requests that aren't handled by standard @@ -207,8 +205,7 @@ struct usb_configuration { * we can't restructure things to avoid mismatching... */ - /* configuration management: bind/unbind */ - int (*bind)(struct usb_configuration *); + /* configuration management: unbind/setup */ void (*unbind)(struct usb_configuration *); int (*setup)(struct usb_configuration *, const struct usb_ctrlrequest *); @@ -232,20 +229,24 @@ struct usb_configuration { }; int usb_add_config(struct usb_composite_dev *, - struct usb_configuration *); + struct usb_configuration *, + int (*)(struct usb_configuration *)); /** * struct usb_composite_driver - groups configurations into a gadget * @name: For diagnostics, identifies the driver. + * @iProduct: Used as iProduct override if @dev->iProduct is not set. + * If NULL value of @name is taken. + * @iManufacturer: Used as iManufacturer override if @dev->iManufacturer is + * not set. If NULL a default "<system> <release> with <udc>" value + * will be used. * @dev: Template descriptor for the device, including default device * identifiers. * @strings: tables of strings, keyed by identifiers assigned during bind() * and language IDs provided in control requests - * @bind: (REQUIRED) Used to allocate resources that are shared across the - * whole device, such as string IDs, and add its configurations using - * @usb_add_config(). This may fail by returning a negative errno - * value; it should return zero on successful initialization. - * @unbind: Reverses @bind(); called as a side effect of unregistering + * @needs_serial: set to 1 if the gadget needs userspace to provide + * a serial number. If one is not provided, warning will be printed. + * @unbind: Reverses bind; called as a side effect of unregistering * this driver. * @disconnect: optional driver disconnect method * @suspend: Notifies when the host stops sending USB traffic, @@ -256,7 +257,7 @@ int usb_add_config(struct usb_composite_dev *, * Devices default to reporting self powered operation. Devices which rely * on bus powered operation should report this in their @bind() method. * - * Before returning from @bind, various fields in the template descriptor + * Before returning from bind, various fields in the template descriptor * may be overridden. These include the idVendor/idProduct/bcdDevice values * normally to bind the appropriate host side driver, and the three strings * (iManufacturer, iProduct, iSerialNumber) normally used to provide user @@ -266,15 +267,12 @@ int usb_add_config(struct usb_composite_dev *, */ struct usb_composite_driver { const char *name; + const char *iProduct; + const char *iManufacturer; const struct usb_device_descriptor *dev; struct usb_gadget_strings **strings; + unsigned needs_serial:1; - /* REVISIT: bind() functions can be marked __init, which - * makes trouble for section mismatch analysis. See if - * we can't restructure things to avoid mismatching... - */ - - int (*bind)(struct usb_composite_dev *); int (*unbind)(struct usb_composite_dev *); void (*disconnect)(struct usb_composite_dev *); @@ -284,8 +282,9 @@ struct usb_composite_driver { void (*resume)(struct usb_composite_dev *); }; -extern int usb_composite_register(struct usb_composite_driver *); -extern void usb_composite_unregister(struct usb_composite_driver *); +extern int usb_composite_probe(struct usb_composite_driver *driver, + int (*bind)(struct usb_composite_dev *cdev)); +extern void usb_composite_unregister(struct usb_composite_driver *driver); /** @@ -334,6 +333,9 @@ struct usb_composite_dev { struct list_head configs; struct usb_composite_driver *driver; u8 next_string_id; + u8 manufacturer_override; + u8 product_override; + u8 serial_override; /* the gadget driver won't enable the data pullup * while the deactivation count is nonzero. diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index d3ef42d7d2f0..006412ce2303 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -705,11 +705,6 @@ static inline int usb_gadget_disconnect(struct usb_gadget *gadget) * struct usb_gadget_driver - driver for usb 'slave' devices * @function: String describing the gadget's function * @speed: Highest speed the driver handles. - * @bind: Invoked when the driver is bound to a gadget, usually - * after registering the driver. - * At that point, ep0 is fully initialized, and ep_list holds - * the currently-available endpoints. - * Called in a context that permits sleeping. * @setup: Invoked for ep0 control requests that aren't handled by * the hardware level driver. Most calls must be handled by * the gadget driver, including descriptor and configuration @@ -774,7 +769,6 @@ static inline int usb_gadget_disconnect(struct usb_gadget *gadget) struct usb_gadget_driver { char *function; enum usb_device_speed speed; - int (*bind)(struct usb_gadget *); void (*unbind)(struct usb_gadget *); int (*setup)(struct usb_gadget *, const struct usb_ctrlrequest *); @@ -798,17 +792,19 @@ struct usb_gadget_driver { */ /** - * usb_gadget_register_driver - register a gadget driver - * @driver:the driver being registered + * usb_gadget_probe_driver - probe a gadget driver + * @driver: the driver being registered + * @bind: the driver's bind callback * Context: can sleep * * Call this in your gadget driver's module initialization function, * to tell the underlying usb controller driver about your driver. - * The driver's bind() function will be called to bind it to a - * gadget before this registration call returns. It's expected that - * the bind() functions will be in init sections. + * The @bind() function will be called to bind it to a gadget before this + * registration call returns. It's expected that the @bind() function will + * be in init sections. */ -int usb_gadget_register_driver(struct usb_gadget_driver *driver); +int usb_gadget_probe_driver(struct usb_gadget_driver *driver, + int (*bind)(struct usb_gadget *)); /** * usb_gadget_unregister_driver - unregister a gadget driver diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 3b571f1ffbb3..0b6e751ea0b1 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -329,6 +329,8 @@ extern int usb_hcd_submit_urb(struct urb *urb, gfp_t mem_flags); extern int usb_hcd_unlink_urb(struct urb *urb, int status); extern void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status); +extern void unmap_urb_setup_for_dma(struct usb_hcd *, struct urb *); +extern void unmap_urb_for_dma(struct usb_hcd *, struct urb *); extern void usb_hcd_flush_endpoint(struct usb_device *udev, struct usb_host_endpoint *ep); extern void usb_hcd_disable_endpoint(struct usb_device *udev, diff --git a/include/linux/usb/intel_mid_otg.h b/include/linux/usb/intel_mid_otg.h new file mode 100644 index 000000000000..a0ccf795f362 --- /dev/null +++ b/include/linux/usb/intel_mid_otg.h @@ -0,0 +1,180 @@ +/* + * Intel MID (Langwell/Penwell) USB OTG Transceiver driver + * Copyright (C) 2008 - 2010, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#ifndef __INTEL_MID_OTG_H +#define __INTEL_MID_OTG_H + +#include <linux/pm.h> +#include <linux/usb/otg.h> +#include <linux/notifier.h> + +struct intel_mid_otg_xceiv; + +/* This is a common data structure for Intel MID platform to + * save values of the OTG state machine */ +struct otg_hsm { + /* Input */ + int a_bus_resume; + int a_bus_suspend; + int a_conn; + int a_sess_vld; + int a_srp_det; + int a_vbus_vld; + int b_bus_resume; + int b_bus_suspend; + int b_conn; + int b_se0_srp; + int b_ssend_srp; + int b_sess_end; + int b_sess_vld; + int id; +/* id values */ +#define ID_B 0x05 +#define ID_A 0x04 +#define ID_ACA_C 0x03 +#define ID_ACA_B 0x02 +#define ID_ACA_A 0x01 + int power_up; + int adp_change; + int test_device; + + /* Internal variables */ + int a_set_b_hnp_en; + int b_srp_done; + int b_hnp_enable; + int hnp_poll_enable; + + /* Timeout indicator for timers */ + int a_wait_vrise_tmout; + int a_wait_bcon_tmout; + int a_aidl_bdis_tmout; + int a_bidl_adis_tmout; + int a_bidl_adis_tmr; + int a_wait_vfall_tmout; + int b_ase0_brst_tmout; + int b_bus_suspend_tmout; + int b_srp_init_tmout; + int b_srp_fail_tmout; + int b_srp_fail_tmr; + int b_adp_sense_tmout; + + /* Informative variables */ + int a_bus_drop; + int a_bus_req; + int a_clr_err; + int b_bus_req; + int a_suspend_req; + int b_bus_suspend_vld; + + /* Output */ + int drv_vbus; + int loc_conn; + int loc_sof; + + /* Others */ + int vbus_srp_up; +}; + +/* must provide ULPI access function to read/write registers implemented in + * ULPI address space */ +struct iotg_ulpi_access_ops { + int (*read)(struct intel_mid_otg_xceiv *iotg, u8 reg, u8 *val); + int (*write)(struct intel_mid_otg_xceiv *iotg, u8 reg, u8 val); +}; + +#define OTG_A_DEVICE 0x0 +#define OTG_B_DEVICE 0x1 + +/* + * the Intel MID (Langwell/Penwell) otg transceiver driver needs to interact + * with device and host drivers to implement the USB OTG related feature. More + * function members are added based on otg_transceiver data structure for this + * purpose. + */ +struct intel_mid_otg_xceiv { + struct otg_transceiver otg; + struct otg_hsm hsm; + + /* base address */ + void __iomem *base; + + /* ops to access ulpi */ + struct iotg_ulpi_access_ops ulpi_ops; + + /* atomic notifier for interrupt context */ + struct atomic_notifier_head iotg_notifier; + + /* start/stop USB Host function */ + int (*start_host)(struct intel_mid_otg_xceiv *iotg); + int (*stop_host)(struct intel_mid_otg_xceiv *iotg); + + /* start/stop USB Peripheral function */ + int (*start_peripheral)(struct intel_mid_otg_xceiv *iotg); + int (*stop_peripheral)(struct intel_mid_otg_xceiv *iotg); + + /* start/stop ADP sense/probe function */ + int (*set_adp_probe)(struct intel_mid_otg_xceiv *iotg, + bool enabled, int dev); + int (*set_adp_sense)(struct intel_mid_otg_xceiv *iotg, + bool enabled); + +#ifdef CONFIG_PM + /* suspend/resume USB host function */ + int (*suspend_host)(struct intel_mid_otg_xceiv *iotg, + pm_message_t message); + int (*resume_host)(struct intel_mid_otg_xceiv *iotg); + + int (*suspend_peripheral)(struct intel_mid_otg_xceiv *iotg, + pm_message_t message); + int (*resume_peripheral)(struct intel_mid_otg_xceiv *iotg); +#endif + +}; +static inline +struct intel_mid_otg_xceiv *otg_to_mid_xceiv(struct otg_transceiver *otg) +{ + return container_of(otg, struct intel_mid_otg_xceiv, otg); +} + +#define MID_OTG_NOTIFY_CONNECT 0x0001 +#define MID_OTG_NOTIFY_DISCONN 0x0002 +#define MID_OTG_NOTIFY_HSUSPEND 0x0003 +#define MID_OTG_NOTIFY_HRESUME 0x0004 +#define MID_OTG_NOTIFY_CSUSPEND 0x0005 +#define MID_OTG_NOTIFY_CRESUME 0x0006 +#define MID_OTG_NOTIFY_HOSTADD 0x0007 +#define MID_OTG_NOTIFY_HOSTREMOVE 0x0008 +#define MID_OTG_NOTIFY_CLIENTADD 0x0009 +#define MID_OTG_NOTIFY_CLIENTREMOVE 0x000a + +static inline int +intel_mid_otg_register_notifier(struct intel_mid_otg_xceiv *iotg, + struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&iotg->iotg_notifier, nb); +} + +static inline void +intel_mid_otg_unregister_notifier(struct intel_mid_otg_xceiv *iotg, + struct notifier_block *nb) +{ + atomic_notifier_chain_unregister(&iotg->iotg_notifier, nb); +} + +#endif /* __INTEL_MID_OTG_H */ diff --git a/include/linux/usb/langwell_otg.h b/include/linux/usb/langwell_otg.h new file mode 100644 index 000000000000..51f17b16d312 --- /dev/null +++ b/include/linux/usb/langwell_otg.h @@ -0,0 +1,139 @@ +/* + * Intel Langwell USB OTG transceiver driver + * Copyright (C) 2008 - 2010, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#ifndef __LANGWELL_OTG_H +#define __LANGWELL_OTG_H + +#include <linux/usb/intel_mid_otg.h> + +#define CI_USBCMD 0x30 +# define USBCMD_RST BIT(1) +# define USBCMD_RS BIT(0) +#define CI_USBSTS 0x34 +# define USBSTS_SLI BIT(8) +# define USBSTS_URI BIT(6) +# define USBSTS_PCI BIT(2) +#define CI_PORTSC1 0x74 +# define PORTSC_PP BIT(12) +# define PORTSC_LS (BIT(11) | BIT(10)) +# define PORTSC_SUSP BIT(7) +# define PORTSC_CCS BIT(0) +#define CI_HOSTPC1 0xb4 +# define HOSTPC1_PHCD BIT(22) +#define CI_OTGSC 0xf4 +# define OTGSC_DPIE BIT(30) +# define OTGSC_1MSE BIT(29) +# define OTGSC_BSEIE BIT(28) +# define OTGSC_BSVIE BIT(27) +# define OTGSC_ASVIE BIT(26) +# define OTGSC_AVVIE BIT(25) +# define OTGSC_IDIE BIT(24) +# define OTGSC_DPIS BIT(22) +# define OTGSC_1MSS BIT(21) +# define OTGSC_BSEIS BIT(20) +# define OTGSC_BSVIS BIT(19) +# define OTGSC_ASVIS BIT(18) +# define OTGSC_AVVIS BIT(17) +# define OTGSC_IDIS BIT(16) +# define OTGSC_DPS BIT(14) +# define OTGSC_1MST BIT(13) +# define OTGSC_BSE BIT(12) +# define OTGSC_BSV BIT(11) +# define OTGSC_ASV BIT(10) +# define OTGSC_AVV BIT(9) +# define OTGSC_ID BIT(8) +# define OTGSC_HABA BIT(7) +# define OTGSC_HADP BIT(6) +# define OTGSC_IDPU BIT(5) +# define OTGSC_DP BIT(4) +# define OTGSC_OT BIT(3) +# define OTGSC_HAAR BIT(2) +# define OTGSC_VC BIT(1) +# define OTGSC_VD BIT(0) +# define OTGSC_INTEN_MASK (0x7f << 24) +# define OTGSC_INT_MASK (0x5f << 24) +# define OTGSC_INTSTS_MASK (0x7f << 16) +#define CI_USBMODE 0xf8 +# define USBMODE_CM (BIT(1) | BIT(0)) +# define USBMODE_IDLE 0 +# define USBMODE_DEVICE 0x2 +# define USBMODE_HOST 0x3 +#define USBCFG_ADDR 0xff10801c +#define USBCFG_LEN 4 +# define USBCFG_VBUSVAL BIT(14) +# define USBCFG_AVALID BIT(13) +# define USBCFG_BVALID BIT(12) +# define USBCFG_SESEND BIT(11) + +#define INTR_DUMMY_MASK (USBSTS_SLI | USBSTS_URI | USBSTS_PCI) + +enum langwell_otg_timer_type { + TA_WAIT_VRISE_TMR, + TA_WAIT_BCON_TMR, + TA_AIDL_BDIS_TMR, + TB_ASE0_BRST_TMR, + TB_SE0_SRP_TMR, + TB_SRP_INIT_TMR, + TB_SRP_FAIL_TMR, + TB_BUS_SUSPEND_TMR +}; + +#define TA_WAIT_VRISE 100 +#define TA_WAIT_BCON 30000 +#define TA_AIDL_BDIS 15000 +#define TB_ASE0_BRST 5000 +#define TB_SE0_SRP 2 +#define TB_SRP_INIT 100 +#define TB_SRP_FAIL 5500 +#define TB_BUS_SUSPEND 500 + +struct langwell_otg_timer { + unsigned long expires; /* Number of count increase to timeout */ + unsigned long count; /* Tick counter */ + void (*function)(unsigned long); /* Timeout function */ + unsigned long data; /* Data passed to function */ + struct list_head list; +}; + +struct langwell_otg { + struct intel_mid_otg_xceiv iotg; + struct device *dev; + + void __iomem *usbcfg; /* SCCBUSB config Reg */ + + unsigned region; + unsigned cfg_region; + + struct work_struct work; + struct workqueue_struct *qwork; + struct timer_list hsm_timer; + + spinlock_t lock; + spinlock_t wq_lock; + + struct notifier_block iotg_notifier; +}; + +static inline +struct langwell_otg *mid_xceiv_to_lnw(struct intel_mid_otg_xceiv *iotg) +{ + return container_of(iotg, struct langwell_otg, iotg); +} + +#endif /* __LANGWELL_OTG_H__ */ diff --git a/include/linux/usb/ncm.h b/include/linux/usb/ncm.h deleted file mode 100644 index 006d1064c8b2..000000000000 --- a/include/linux/usb/ncm.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * USB CDC NCM auxiliary definitions - */ - -#ifndef __LINUX_USB_NCM_H -#define __LINUX_USB_NCM_H - -#include <linux/types.h> -#include <linux/usb/cdc.h> -#include <asm/unaligned.h> - -#define NCM_NTB_MIN_IN_SIZE 2048 -#define NCM_NTB_MIN_OUT_SIZE 2048 - -#define NCM_CONTROL_TIMEOUT (5 * 1000) - -/* bmNetworkCapabilities */ - -#define NCM_NCAP_ETH_FILTER (1 << 0) -#define NCM_NCAP_NET_ADDRESS (1 << 1) -#define NCM_NCAP_ENCAP_COMM (1 << 2) -#define NCM_NCAP_MAX_DGRAM (1 << 3) -#define NCM_NCAP_CRC_MODE (1 << 4) - -/* - * Here are options for NCM Datagram Pointer table (NDP) parser. - * There are 2 different formats: NDP16 and NDP32 in the spec (ch. 3), - * in NDP16 offsets and sizes fields are 1 16bit word wide, - * in NDP32 -- 2 16bit words wide. Also signatures are different. - * To make the parser code the same, put the differences in the structure, - * and switch pointers to the structures when the format is changed. - */ - -struct ndp_parser_opts { - u32 nth_sign; - u32 ndp_sign; - unsigned nth_size; - unsigned ndp_size; - unsigned ndplen_align; - /* sizes in u16 units */ - unsigned dgram_item_len; /* index or length */ - unsigned block_length; - unsigned fp_index; - unsigned reserved1; - unsigned reserved2; - unsigned next_fp_index; -}; - -#define INIT_NDP16_OPTS { \ - .nth_sign = NCM_NTH16_SIGN, \ - .ndp_sign = NCM_NDP16_NOCRC_SIGN, \ - .nth_size = sizeof(struct usb_cdc_ncm_nth16), \ - .ndp_size = sizeof(struct usb_cdc_ncm_ndp16), \ - .ndplen_align = 4, \ - .dgram_item_len = 1, \ - .block_length = 1, \ - .fp_index = 1, \ - .reserved1 = 0, \ - .reserved2 = 0, \ - .next_fp_index = 1, \ - } - - -#define INIT_NDP32_OPTS { \ - .nth_sign = NCM_NTH32_SIGN, \ - .ndp_sign = NCM_NDP32_NOCRC_SIGN, \ - .nth_size = sizeof(struct usb_cdc_ncm_nth32), \ - .ndp_size = sizeof(struct usb_cdc_ncm_ndp32), \ - .ndplen_align = 8, \ - .dgram_item_len = 2, \ - .block_length = 2, \ - .fp_index = 2, \ - .reserved1 = 1, \ - .reserved2 = 2, \ - .next_fp_index = 2, \ - } - -static inline void put_ncm(__le16 **p, unsigned size, unsigned val) -{ - switch (size) { - case 1: - put_unaligned_le16((u16)val, *p); - break; - case 2: - put_unaligned_le32((u32)val, *p); - - break; - default: - BUG(); - } - - *p += size; -} - -static inline unsigned get_ncm(__le16 **p, unsigned size) -{ - unsigned tmp; - - switch (size) { - case 1: - tmp = get_unaligned_le16(*p); - break; - case 2: - tmp = get_unaligned_le32(*p); - break; - default: - BUG(); - } - - *p += size; - return tmp; -} - -#endif /* __LINUX_USB_NCM_H */ diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h index 545cba73ccaf..0a5b3711e502 100644 --- a/include/linux/usb/otg.h +++ b/include/linux/usb/otg.h @@ -164,8 +164,19 @@ otg_shutdown(struct otg_transceiver *otg) } /* for usb host and peripheral controller drivers */ +#ifdef CONFIG_USB_OTG_UTILS extern struct otg_transceiver *otg_get_transceiver(void); extern void otg_put_transceiver(struct otg_transceiver *); +#else +static inline struct otg_transceiver *otg_get_transceiver(void) +{ + return NULL; +} + +static inline void otg_put_transceiver(struct otg_transceiver *x) +{ +} +#endif /* Context: can sleep */ static inline int diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 55675b1efb28..16d682f4f7c3 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -271,6 +271,8 @@ struct usb_serial_driver { int (*tiocmget)(struct tty_struct *tty, struct file *file); int (*tiocmset)(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear); + int (*get_icount)(struct tty_struct *tty, + struct serial_icounter_struct *icount); /* Called by the tty layer for port level work. There may or may not be an attached tty at this point */ void (*dtr_rts)(struct usb_serial_port *port, int on); diff --git a/include/linux/usb/storage.h b/include/linux/usb/storage.h new file mode 100644 index 000000000000..d7fc910f1dc4 --- /dev/null +++ b/include/linux/usb/storage.h @@ -0,0 +1,48 @@ +#ifndef __LINUX_USB_STORAGE_H +#define __LINUX_USB_STORAGE_H + +/* + * linux/usb/storage.h + * + * Copyright Matthew Wilcox for Intel Corp, 2010 + * + * This file contains definitions taken from the + * USB Mass Storage Class Specification Overview + * + * Distributed under the terms of the GNU GPL, version two. + */ + +/* Storage subclass codes */ + +#define USB_SC_RBC 0x01 /* Typically, flash devices */ +#define USB_SC_8020 0x02 /* CD-ROM */ +#define USB_SC_QIC 0x03 /* QIC-157 Tapes */ +#define USB_SC_UFI 0x04 /* Floppy */ +#define USB_SC_8070 0x05 /* Removable media */ +#define USB_SC_SCSI 0x06 /* Transparent */ +#define USB_SC_LOCKABLE 0x07 /* Password-protected */ + +#define USB_SC_ISD200 0xf0 /* ISD200 ATA */ +#define USB_SC_CYP_ATACB 0xf1 /* Cypress ATACB */ +#define USB_SC_DEVICE 0xff /* Use device's value */ + +/* Storage protocol codes */ + +#define USB_PR_CBI 0x00 /* Control/Bulk/Interrupt */ +#define USB_PR_CB 0x01 /* Control/Bulk w/o interrupt */ +#define USB_PR_BULK 0x50 /* bulk only */ +#define USB_PR_UAS 0x62 /* USB Attached SCSI */ + +#define USB_PR_USBAT 0x80 /* SCM-ATAPI bridge */ +#define USB_PR_EUSB_SDDR09 0x81 /* SCM-SCSI bridge for SDDR-09 */ +#define USB_PR_SDDR55 0x82 /* SDDR-55 (made up) */ +#define USB_PR_DPCM_USB 0xf0 /* Combination CB/SDDR09 */ +#define USB_PR_FREECOM 0xf1 /* Freecom */ +#define USB_PR_DATAFAB 0xf2 /* Datafab chipsets */ +#define USB_PR_JUMPSHOT 0xf3 /* Lexar Jumpshot */ +#define USB_PR_ALAUDA 0xf4 /* Alauda chipsets */ +#define USB_PR_KARMA 0xf5 /* Rio Karma */ + +#define USB_PR_DEVICE 0xff /* Use device's value */ + +#endif diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index a4b947e470a5..71693d4a4fe1 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -58,7 +58,11 @@ US_FLAG(CAPACITY_OK, 0x00010000) \ /* READ CAPACITY response is correct */ \ US_FLAG(BAD_SENSE, 0x00020000) \ - /* Bad Sense (never more than 18 bytes) */ + /* Bad Sense (never more than 18 bytes) */ \ + US_FLAG(NO_READ_DISC_INFO, 0x00040000) \ + /* cannot handle READ_DISC_INFO */ \ + US_FLAG(NO_READ_CAPACITY_16, 0x00080000) \ + /* cannot handle READ_CAPACITY_16 */ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; @@ -74,42 +78,7 @@ enum { US_DO_ALL_FLAGS }; #define USB_US_TYPE(flags) (((flags) >> 24) & 0xFF) #define USB_US_ORIG_FLAGS(flags) ((flags) & 0x00FFFFFF) -/* - * This is probably not the best place to keep these constants, conceptually. - * But it's the only header included into all places which need them. - */ - -/* Sub Classes */ - -#define US_SC_RBC 0x01 /* Typically, flash devices */ -#define US_SC_8020 0x02 /* CD-ROM */ -#define US_SC_QIC 0x03 /* QIC-157 Tapes */ -#define US_SC_UFI 0x04 /* Floppy */ -#define US_SC_8070 0x05 /* Removable media */ -#define US_SC_SCSI 0x06 /* Transparent */ -#define US_SC_LOCKABLE 0x07 /* Password-protected */ - -#define US_SC_ISD200 0xf0 /* ISD200 ATA */ -#define US_SC_CYP_ATACB 0xf1 /* Cypress ATACB */ -#define US_SC_DEVICE 0xff /* Use device's value */ - -/* Protocols */ - -#define US_PR_CBI 0x00 /* Control/Bulk/Interrupt */ -#define US_PR_CB 0x01 /* Control/Bulk w/o interrupt */ -#define US_PR_BULK 0x50 /* bulk only */ - -#define US_PR_USBAT 0x80 /* SCM-ATAPI bridge */ -#define US_PR_EUSB_SDDR09 0x81 /* SCM-SCSI bridge for SDDR-09 */ -#define US_PR_SDDR55 0x82 /* SDDR-55 (made up) */ -#define US_PR_DPCM_USB 0xf0 /* Combination CB/SDDR09 */ -#define US_PR_FREECOM 0xf1 /* Freecom */ -#define US_PR_DATAFAB 0xf2 /* Datafab chipsets */ -#define US_PR_JUMPSHOT 0xf3 /* Lexar Jumpshot */ -#define US_PR_ALAUDA 0xf4 /* Alauda chipsets */ -#define US_PR_KARMA 0xf5 /* Rio Karma */ - -#define US_PR_DEVICE 0xff /* Use device's value */ +#include <linux/usb/storage.h> /* */ diff --git a/include/linux/wait.h b/include/linux/wait.h index 0836ccc57121..3efc9f3f43a0 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -614,6 +614,7 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); (wait)->private = current; \ (wait)->func = autoremove_wake_function; \ INIT_LIST_HEAD(&(wait)->task_list); \ + (wait)->flags = 0; \ } while (0) /** diff --git a/include/linux/wireless.h b/include/linux/wireless.h index e6827eedf18b..4395b28bb86c 100644 --- a/include/linux/wireless.h +++ b/include/linux/wireless.h @@ -1157,6 +1157,6 @@ struct __compat_iw_event { #define IW_EV_PARAM_PK_LEN (IW_EV_LCP_PK_LEN + sizeof(struct iw_param)) #define IW_EV_ADDR_PK_LEN (IW_EV_LCP_PK_LEN + sizeof(struct sockaddr)) #define IW_EV_QUAL_PK_LEN (IW_EV_LCP_PK_LEN + sizeof(struct iw_quality)) -#define IW_EV_POINT_PK_LEN (IW_EV_LCP_LEN + 4) +#define IW_EV_POINT_PK_LEN (IW_EV_LCP_PK_LEN + 4) #endif /* _LINUX_WIRELESS_H */ diff --git a/include/linux/spi/wl12xx.h b/include/linux/wl12xx.h index a223ecbc71ef..4f902e1908aa 100644 --- a/include/linux/spi/wl12xx.h +++ b/include/linux/wl12xx.h @@ -3,7 +3,7 @@ * * Copyright (C) 2009 Nokia Corporation * - * Contact: Kalle Valo <kalle.valo@nokia.com> + * Contact: Luciano Coelho <luciano.coelho@nokia.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -21,14 +21,31 @@ * */ -#ifndef _LINUX_SPI_WL12XX_H -#define _LINUX_SPI_WL12XX_H +#ifndef _LINUX_WL12XX_H +#define _LINUX_WL12XX_H struct wl12xx_platform_data { void (*set_power)(bool enable); /* SDIO only: IRQ number if WLAN_IRQ line is used, 0 for SDIO IRQs */ int irq; bool use_eeprom; + int board_ref_clock; }; +#ifdef CONFIG_WL12XX_PLATFORM_DATA + +int wl12xx_set_platform_data(const struct wl12xx_platform_data *data); + +#else + +static inline +int wl12xx_set_platform_data(const struct wl12xx_platform_data *data) +{ + return -ENOSYS; +} + +#endif + +const struct wl12xx_platform_data *wl12xx_get_platform_data(void); + #endif diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 25e02c941bac..070bb7a88936 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -243,11 +243,12 @@ enum { WQ_NON_REENTRANT = 1 << 0, /* guarantee non-reentrance */ WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ WQ_FREEZEABLE = 1 << 2, /* freeze during suspend */ - WQ_RESCUER = 1 << 3, /* has an rescue worker */ + WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ WQ_HIGHPRI = 1 << 4, /* high priority */ WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */ WQ_DYING = 1 << 6, /* internal: workqueue is dying */ + WQ_RESCUER = 1 << 7, /* internal: workqueue has rescuer */ WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ @@ -306,12 +307,30 @@ __alloc_workqueue_key(const char *name, unsigned int flags, int max_active, __alloc_workqueue_key((name), (flags), (max_active), NULL, NULL) #endif +/** + * alloc_ordered_workqueue - allocate an ordered workqueue + * @name: name of the workqueue + * @flags: WQ_* flags (only WQ_FREEZEABLE and WQ_MEM_RECLAIM are meaningful) + * + * Allocate an ordered workqueue. An ordered workqueue executes at + * most one work item at any given time in the queued order. They are + * implemented as unbound workqueues with @max_active of one. + * + * RETURNS: + * Pointer to the allocated workqueue on success, %NULL on failure. + */ +static inline struct workqueue_struct * +alloc_ordered_workqueue(const char *name, unsigned int flags) +{ + return alloc_workqueue(name, WQ_UNBOUND | flags, 1); +} + #define create_workqueue(name) \ - alloc_workqueue((name), WQ_RESCUER, 1) + alloc_workqueue((name), WQ_MEM_RECLAIM, 1) #define create_freezeable_workqueue(name) \ - alloc_workqueue((name), WQ_FREEZEABLE | WQ_UNBOUND | WQ_RESCUER, 1) + alloc_workqueue((name), WQ_FREEZEABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, 1) #define create_singlethread_workqueue(name) \ - alloc_workqueue((name), WQ_UNBOUND | WQ_RESCUER, 1) + alloc_workqueue((name), WQ_UNBOUND | WQ_MEM_RECLAIM, 1) extern void destroy_workqueue(struct workqueue_struct *wq); @@ -325,7 +344,6 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, extern void flush_workqueue(struct workqueue_struct *wq); extern void flush_scheduled_work(void); -extern void flush_delayed_work(struct delayed_work *work); extern int schedule_work(struct work_struct *work); extern int schedule_work_on(int cpu, struct work_struct *work); @@ -337,8 +355,13 @@ extern int keventd_up(void); int execute_in_process_context(work_func_t fn, struct execute_work *); -extern int flush_work(struct work_struct *work); -extern int cancel_work_sync(struct work_struct *work); +extern bool flush_work(struct work_struct *work); +extern bool flush_work_sync(struct work_struct *work); +extern bool cancel_work_sync(struct work_struct *work); + +extern bool flush_delayed_work(struct delayed_work *dwork); +extern bool flush_delayed_work_sync(struct delayed_work *work); +extern bool cancel_delayed_work_sync(struct delayed_work *dwork); extern void workqueue_set_max_active(struct workqueue_struct *wq, int max_active); @@ -352,9 +375,9 @@ extern unsigned int work_busy(struct work_struct *work); * it returns 1 and the work doesn't re-arm itself. Run flush_workqueue() or * cancel_work_sync() to wait on it. */ -static inline int cancel_delayed_work(struct delayed_work *work) +static inline bool cancel_delayed_work(struct delayed_work *work) { - int ret; + bool ret; ret = del_timer_sync(&work->timer); if (ret) @@ -367,9 +390,9 @@ static inline int cancel_delayed_work(struct delayed_work *work) * if it returns 0 the timer function may be running and the queueing is in * progress. */ -static inline int __cancel_delayed_work(struct delayed_work *work) +static inline bool __cancel_delayed_work(struct delayed_work *work) { - int ret; + bool ret; ret = del_timer(&work->timer); if (ret) @@ -377,8 +400,6 @@ static inline int __cancel_delayed_work(struct delayed_work *work) return ret; } -extern int cancel_delayed_work_sync(struct delayed_work *work); - /* Obsolete. use cancel_delayed_work_sync() */ static inline void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, @@ -409,8 +430,4 @@ extern bool freeze_workqueues_busy(void); extern void thaw_workqueues(void); #endif /* CONFIG_FREEZER */ -#ifdef CONFIG_LOCKDEP -int in_workqueue_context(struct workqueue_struct *wq); -#endif - #endif diff --git a/include/media/videobuf-dma-sg.h b/include/media/videobuf-dma-sg.h index 97e07f46a0fa..aa4ebb42a565 100644 --- a/include/media/videobuf-dma-sg.h +++ b/include/media/videobuf-dma-sg.h @@ -48,6 +48,7 @@ struct videobuf_dmabuf { /* for userland buffer */ int offset; + size_t size; struct page **pages; /* for kernel buffers */ diff --git a/include/net/9p/client.h b/include/net/9p/client.h index d1aa2cfb30f0..7f63d5ab7b44 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -212,15 +212,12 @@ struct p9_dirent { int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb); int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name); -int p9_client_version(struct p9_client *); struct p9_client *p9_client_create(const char *dev_name, char *options); void p9_client_destroy(struct p9_client *clnt); void p9_client_disconnect(struct p9_client *clnt); void p9_client_begin_disconnect(struct p9_client *clnt); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, char *uname, u32 n_uname, char *aname); -struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname, - u32 n_uname, char *aname); struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, int clone); int p9_client_open(struct p9_fid *fid, int mode); diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 4d40c4d0230b..a9441249306c 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -175,20 +175,32 @@ extern int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, extern int register_inet6addr_notifier(struct notifier_block *nb); extern int unregister_inet6addr_notifier(struct notifier_block *nb); -static inline struct inet6_dev * -__in6_dev_get(struct net_device *dev) +/** + * __in6_dev_get - get inet6_dev pointer from netdevice + * @dev: network device + * + * Caller must hold rcu_read_lock or RTNL, because this function + * does not take a reference on the inet6_dev. + */ +static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev) { - return rcu_dereference_check(dev->ip6_ptr, - rcu_read_lock_held() || - lockdep_rtnl_is_held()); + return rcu_dereference_rtnl(dev->ip6_ptr); } -static inline struct inet6_dev * -in6_dev_get(struct net_device *dev) +/** + * in6_dev_get - get inet6_dev pointer from netdevice + * @dev: network device + * + * This version can be used in any context, and takes a reference + * on the inet6_dev. Callers must use in6_dev_put() later to + * release this reference. + */ +static inline struct inet6_dev *in6_dev_get(const struct net_device *dev) { - struct inet6_dev *idev = NULL; + struct inet6_dev *idev; + rcu_read_lock(); - idev = __in6_dev_get(dev); + idev = rcu_dereference(dev->ip6_ptr); if (idev) atomic_inc(&idev->refcnt); rcu_read_unlock(); @@ -197,16 +209,21 @@ in6_dev_get(struct net_device *dev) extern void in6_dev_finish_destroy(struct inet6_dev *idev); -static inline void -in6_dev_put(struct inet6_dev *idev) +static inline void in6_dev_put(struct inet6_dev *idev) { if (atomic_dec_and_test(&idev->refcnt)) in6_dev_finish_destroy(idev); } -#define __in6_dev_put(idev) atomic_dec(&(idev)->refcnt) -#define in6_dev_hold(idev) atomic_inc(&(idev)->refcnt) +static inline void __in6_dev_put(struct inet6_dev *idev) +{ + atomic_dec(&idev->refcnt); +} +static inline void in6_dev_hold(struct inet6_dev *idev) +{ + atomic_inc(&idev->refcnt); +} extern void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); @@ -216,9 +233,15 @@ static inline void in6_ifa_put(struct inet6_ifaddr *ifp) inet6_ifa_finish_destroy(ifp); } -#define __in6_ifa_put(ifp) atomic_dec(&(ifp)->refcnt) -#define in6_ifa_hold(ifp) atomic_inc(&(ifp)->refcnt) +static inline void __in6_ifa_put(struct inet6_ifaddr *ifp) +{ + atomic_dec(&ifp->refcnt); +} +static inline void in6_ifa_hold(struct inet6_ifaddr *ifp) +{ + atomic_inc(&ifp->refcnt); +} /* @@ -241,23 +264,21 @@ static inline int ipv6_addr_is_multicast(const struct in6_addr *addr) static inline int ipv6_addr_is_ll_all_nodes(const struct in6_addr *addr) { - return (((addr->s6_addr32[0] ^ htonl(0xff020000)) | + return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | addr->s6_addr32[1] | addr->s6_addr32[2] | - (addr->s6_addr32[3] ^ htonl(0x00000001))) == 0); + (addr->s6_addr32[3] ^ htonl(0x00000001))) == 0; } static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr) { - return (((addr->s6_addr32[0] ^ htonl(0xff020000)) | + return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | addr->s6_addr32[1] | addr->s6_addr32[2] | - (addr->s6_addr32[3] ^ htonl(0x00000002))) == 0); + (addr->s6_addr32[3] ^ htonl(0x00000002))) == 0; } -extern int __ipv6_isatap_ifid(u8 *eui, __be32 addr); - static inline int ipv6_addr_is_isatap(const struct in6_addr *addr) { - return ((addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE)); + return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE); } #ifdef CONFIG_PROC_FS diff --git a/include/net/arp.h b/include/net/arp.h index 716f43c5c98e..f4cf6ce66586 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -26,6 +26,4 @@ extern struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, const unsigned char *target_hw); extern void arp_xmit(struct sk_buff *skb); -extern const struct neigh_ops arp_broken_ops; - #endif /* _ARP_H */ diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 27a902d9b3a9..d81ea7997701 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -126,6 +126,8 @@ int bt_sock_unregister(int proto); void bt_sock_link(struct bt_sock_list *l, struct sock *s); void bt_sock_unlink(struct bt_sock_list *l, struct sock *s); int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags); +int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t len, int flags); uint bt_sock_poll(struct file * file, struct socket *sock, poll_table *wait); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); @@ -161,12 +163,30 @@ static inline struct sk_buff *bt_skb_send_alloc(struct sock *sk, unsigned long l { struct sk_buff *skb; + release_sock(sk); if ((skb = sock_alloc_send_skb(sk, len + BT_SKB_RESERVE, nb, err))) { skb_reserve(skb, BT_SKB_RESERVE); bt_cb(skb)->incoming = 0; } + lock_sock(sk); + + if (!skb && *err) + return NULL; + + *err = sock_error(sk); + if (*err) + goto out; + + if (sk->sk_shutdown) { + *err = -ECONNRESET; + goto out; + } return skb; + +out: + kfree_skb(skb); + return NULL; } int bt_err(__u16 code); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index bcbdd6d4e6dd..e30e00834340 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -54,7 +54,7 @@ /* HCI controller types */ #define HCI_BREDR 0x00 -#define HCI_80211 0x01 +#define HCI_AMP 0x01 /* HCI device quirks */ enum { diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 4568b938ca35..ebec8c9a929d 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -233,7 +233,7 @@ static inline void inquiry_cache_init(struct hci_dev *hdev) static inline int inquiry_cache_empty(struct hci_dev *hdev) { struct inquiry_cache *c = &hdev->inq_cache; - return (c->list == NULL); + return c->list == NULL; } static inline long inquiry_cache_age(struct hci_dev *hdev) diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 6c241444f902..c819c8bf9b68 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -414,7 +414,7 @@ static inline int l2cap_tx_window_full(struct sock *sk) if (sub < 0) sub += 64; - return (sub == pi->remote_tx_win); + return sub == pi->remote_tx_win; } #define __get_txseq(ctrl) ((ctrl) & L2CAP_CTRL_TXSEQ) >> 1 diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index a140847d622c..71047bc0af84 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -213,11 +213,6 @@ struct rfcomm_dlc { #define RFCOMM_DEFER_SETUP 8 /* Scheduling flags and events */ -#define RFCOMM_SCHED_STATE 0 -#define RFCOMM_SCHED_RX 1 -#define RFCOMM_SCHED_TX 2 -#define RFCOMM_SCHED_TIMEO 3 -#define RFCOMM_SCHED_AUTH 4 #define RFCOMM_SCHED_WAKEUP 31 /* MSC exchange flags */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2fd06c60ffbb..2a7936d7851d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -25,6 +25,43 @@ #include <linux/wireless.h> +/** + * DOC: Introduction + * + * cfg80211 is the configuration API for 802.11 devices in Linux. It bridges + * userspace and drivers, and offers some utility functionality associated + * with 802.11. cfg80211 must, directly or indirectly via mac80211, be used + * by all modern wireless drivers in Linux, so that they offer a consistent + * API through nl80211. For backward compatibility, cfg80211 also offers + * wireless extensions to userspace, but hides them from drivers completely. + * + * Additionally, cfg80211 contains code to help enforce regulatory spectrum + * use restrictions. + */ + + +/** + * DOC: Device registration + * + * In order for a driver to use cfg80211, it must register the hardware device + * with cfg80211. This happens through a number of hardware capability structs + * described below. + * + * The fundamental structure for each device is the 'wiphy', of which each + * instance describes a physical wireless device connected to the system. Each + * such wiphy can have zero, one, or many virtual interfaces associated with + * it, which need to be identified as such by pointing the network interface's + * @ieee80211_ptr pointer to a &struct wireless_dev which further describes + * the wireless part of the interface, normally this struct is embedded in the + * network interface's private data area. Drivers can optionally allow creating + * or destroying virtual interfaces on the fly, but without at least one or the + * ability to create some the wireless device isn't useful. + * + * Each wiphy structure contains device capability information, and also has + * a pointer to the various operations the driver offers. The definitions and + * structures here describe these capabilities in detail. + */ + /* * wireless hardware capability structures */ @@ -205,6 +242,21 @@ struct ieee80211_supported_band { */ /** + * DOC: Actions and configuration + * + * Each wireless device and each virtual interface offer a set of configuration + * operations and other actions that are invoked by userspace. Each of these + * actions is described in the operations structure, and the parameters these + * operations use are described separately. + * + * Additionally, some operations are asynchronous and expect to get status + * information via some functions that drivers need to call. + * + * Scanning and BSS list handling with its associated functionality is described + * in a separate chapter. + */ + +/** * struct vif_params - describes virtual interface parameters * @mesh_id: mesh ID to use * @mesh_id_len: length of the mesh ID @@ -241,12 +293,24 @@ struct key_params { * enum survey_info_flags - survey information flags * * @SURVEY_INFO_NOISE_DBM: noise (in dBm) was filled in + * @SURVEY_INFO_IN_USE: channel is currently being used + * @SURVEY_INFO_CHANNEL_TIME: channel active time (in ms) was filled in + * @SURVEY_INFO_CHANNEL_TIME_BUSY: channel busy time was filled in + * @SURVEY_INFO_CHANNEL_TIME_EXT_BUSY: extension channel busy time was filled in + * @SURVEY_INFO_CHANNEL_TIME_RX: channel receive time was filled in + * @SURVEY_INFO_CHANNEL_TIME_TX: channel transmit time was filled in * * Used by the driver to indicate which info in &struct survey_info * it has filled in during the get_survey(). */ enum survey_info_flags { SURVEY_INFO_NOISE_DBM = 1<<0, + SURVEY_INFO_IN_USE = 1<<1, + SURVEY_INFO_CHANNEL_TIME = 1<<2, + SURVEY_INFO_CHANNEL_TIME_BUSY = 1<<3, + SURVEY_INFO_CHANNEL_TIME_EXT_BUSY = 1<<4, + SURVEY_INFO_CHANNEL_TIME_RX = 1<<5, + SURVEY_INFO_CHANNEL_TIME_TX = 1<<6, }; /** @@ -256,6 +320,11 @@ enum survey_info_flags { * @filled: bitflag of flags from &enum survey_info_flags * @noise: channel noise in dBm. This and all following fields are * optional + * @channel_time: amount of time in ms the radio spent on the channel + * @channel_time_busy: amount of time the primary channel was sensed busy + * @channel_time_ext_busy: amount of time the extension channel was sensed busy + * @channel_time_rx: amount of time the radio spent receiving data + * @channel_time_tx: amount of time the radio spent transmitting data * * Used by dump_survey() to report back per-channel survey information. * @@ -264,6 +333,11 @@ enum survey_info_flags { */ struct survey_info { struct ieee80211_channel *channel; + u64 channel_time; + u64 channel_time_busy; + u64 channel_time_ext_busy; + u64 channel_time_rx; + u64 channel_time_tx; u32 filled; s8 noise; }; @@ -347,6 +421,9 @@ struct station_parameters { * (tx_bitrate, tx_bitrate_flags and tx_bitrate_mcs) * @STATION_INFO_RX_PACKETS: @rx_packets filled * @STATION_INFO_TX_PACKETS: @tx_packets filled + * @STATION_INFO_TX_RETRIES: @tx_retries filled + * @STATION_INFO_TX_FAILED: @tx_failed filled + * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled */ enum station_info_flags { STATION_INFO_INACTIVE_TIME = 1<<0, @@ -359,6 +436,9 @@ enum station_info_flags { STATION_INFO_TX_BITRATE = 1<<7, STATION_INFO_RX_PACKETS = 1<<8, STATION_INFO_TX_PACKETS = 1<<9, + STATION_INFO_TX_RETRIES = 1<<10, + STATION_INFO_TX_FAILED = 1<<11, + STATION_INFO_RX_DROP_MISC = 1<<12, }; /** @@ -408,6 +488,9 @@ struct rate_info { * @txrate: current unicast bitrate to this station * @rx_packets: packets received from this station * @tx_packets: packets transmitted to this station + * @tx_retries: cumulative retry counts + * @tx_failed: number of failed transmissions (retries exceeded, no ACK) + * @rx_dropped_misc: Dropped for un-specified reason. * @generation: generation number for nl80211 dumps. * This number should increase every time the list of stations * changes, i.e. when a station is added or removed, so that @@ -425,6 +508,9 @@ struct station_info { struct rate_info txrate; u32 rx_packets; u32 tx_packets; + u32 tx_retries; + u32 tx_failed; + u32 rx_dropped_misc; int generation; }; @@ -570,8 +656,28 @@ struct ieee80211_txq_params { /* from net/wireless.h */ struct wiphy; -/* from net/ieee80211.h */ -struct ieee80211_channel; +/** + * DOC: Scanning and BSS list handling + * + * The scanning process itself is fairly simple, but cfg80211 offers quite + * a bit of helper functionality. To start a scan, the scan operation will + * be invoked with a scan definition. This scan definition contains the + * channels to scan, and the SSIDs to send probe requests for (including the + * wildcard, if desired). A passive scan is indicated by having no SSIDs to + * probe. Additionally, a scan request may contain extra information elements + * that should be added to the probe request. The IEs are guaranteed to be + * well-formed, and will not exceed the maximum length the driver advertised + * in the wiphy structure. + * + * When scanning finds a BSS, cfg80211 needs to be notified of that, because + * it is responsible for maintaining the BSS list; the driver should not + * maintain a list itself. For this notification, various functions exist. + * + * Since drivers do not maintain a BSS list, there are also a number of + * functions to search for a BSS and obtain information about it from the + * BSS structure cfg80211 maintains. The BSS list is also made available + * to userspace. + */ /** * struct cfg80211_ssid - SSID description @@ -691,6 +797,10 @@ const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie); * sets/clears %NL80211_STA_FLAG_AUTHORIZED. If true, the driver is * required to assume that the port is unauthorized until authorized by * user space. Otherwise, port is marked authorized by default. + * @control_port_ethertype: the control port protocol that should be + * allowed through even on unauthorized ports + * @control_port_no_encrypt: TRUE to prevent encryption of control port + * protocol frames. */ struct cfg80211_crypto_settings { u32 wpa_versions; @@ -700,6 +810,8 @@ struct cfg80211_crypto_settings { int n_akm_suites; u32 akm_suites[NL80211_MAX_NR_AKM_SUITES]; bool control_port; + __be16 control_port_ethertype; + bool control_port_no_encrypt; }; /** @@ -1020,7 +1132,7 @@ struct cfg80211_pmksa { * @cancel_remain_on_channel: Cancel an on-going remain-on-channel operation. * This allows the operation to be terminated prior to timeout based on * the duration value. - * @action: Transmit an action frame + * @mgmt_tx: Transmit a management frame * * @testmode_cmd: run a test mode command * @@ -1035,6 +1147,9 @@ struct cfg80211_pmksa { * allows the driver to adjust the dynamic ps timeout value. * @set_cqm_rssi_config: Configure connection quality monitor RSSI threshold. * + * @mgmt_frame_register: Notify driver that a management frame type was + * registered. Note that this callback may not sleep, and cannot run + * concurrently with itself. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy); @@ -1050,13 +1165,14 @@ struct cfg80211_ops { struct vif_params *params); int (*add_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, const u8 *mac_addr, + u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params); int (*get_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, const u8 *mac_addr, void *cookie, + u8 key_index, bool pairwise, const u8 *mac_addr, + void *cookie, void (*callback)(void *cookie, struct key_params*)); int (*del_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, const u8 *mac_addr); + u8 key_index, bool pairwise, const u8 *mac_addr); int (*set_default_key)(struct wiphy *wiphy, struct net_device *netdev, u8 key_index); @@ -1140,7 +1256,7 @@ struct cfg80211_ops { int (*get_tx_power)(struct wiphy *wiphy, int *dbm); int (*set_wds_peer)(struct wiphy *wiphy, struct net_device *dev, - u8 *addr); + const u8 *addr); void (*rfkill_poll)(struct wiphy *wiphy); @@ -1172,7 +1288,7 @@ struct cfg80211_ops { struct net_device *dev, u64 cookie); - int (*action)(struct wiphy *wiphy, struct net_device *dev, + int (*mgmt_tx)(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, bool channel_type_valid, @@ -1184,6 +1300,10 @@ struct cfg80211_ops { int (*set_cqm_rssi_config)(struct wiphy *wiphy, struct net_device *dev, s32 rssi_thold, u32 rssi_hyst); + + void (*mgmt_frame_register)(struct wiphy *wiphy, + struct net_device *dev, + u16 frame_type, bool reg); }; /* @@ -1221,21 +1341,31 @@ struct cfg80211_ops { * @WIPHY_FLAG_4ADDR_AP: supports 4addr mode even on AP (with a single station * on a VLAN interface) * @WIPHY_FLAG_4ADDR_STATION: supports 4addr mode even as a station + * @WIPHY_FLAG_CONTROL_PORT_PROTOCOL: This device supports setting the + * control port protocol ethertype. The device also honours the + * control_port_no_encrypt flag. + * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN. */ enum wiphy_flags { - WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), - WIPHY_FLAG_STRICT_REGULATORY = BIT(1), - WIPHY_FLAG_DISABLE_BEACON_HINTS = BIT(2), - WIPHY_FLAG_NETNS_OK = BIT(3), - WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4), - WIPHY_FLAG_4ADDR_AP = BIT(5), - WIPHY_FLAG_4ADDR_STATION = BIT(6), + WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), + WIPHY_FLAG_STRICT_REGULATORY = BIT(1), + WIPHY_FLAG_DISABLE_BEACON_HINTS = BIT(2), + WIPHY_FLAG_NETNS_OK = BIT(3), + WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4), + WIPHY_FLAG_4ADDR_AP = BIT(5), + WIPHY_FLAG_4ADDR_STATION = BIT(6), + WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), + WIPHY_FLAG_IBSS_RSN = BIT(7), }; struct mac_address { u8 addr[ETH_ALEN]; }; +struct ieee80211_txrx_stypes { + u16 tx, rx; +}; + /** * struct wiphy - wireless hardware description * @reg_notifier: the driver's regulatory notification callback @@ -1286,6 +1416,10 @@ struct mac_address { * @privid: a pointer that drivers can use to identify if an arbitrary * wiphy is theirs, e.g. in global notifiers * @bands: information about bands/channels supported by this device + * + * @mgmt_stypes: bitmasks of frame subtypes that can be subscribed to or + * transmitted through nl80211, points to an array indexed by interface + * type */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -1294,9 +1428,12 @@ struct wiphy { u8 perm_addr[ETH_ALEN]; u8 addr_mask[ETH_ALEN]; - u16 n_addresses; struct mac_address *addresses; + const struct ieee80211_txrx_stypes *mgmt_stypes; + + u16 n_addresses; + /* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */ u16 interface_modes; @@ -1492,8 +1629,8 @@ struct cfg80211_cached_keys; * set by driver (if supported) on add_interface BEFORE registering the * netdev and may otherwise be used by driver read-only, will be update * by cfg80211 on change_interface - * @action_registrations: list of registrations for action frames - * @action_registrations_lock: lock for the list + * @mgmt_registrations: list of registrations for management frames + * @mgmt_registrations_lock: lock for the list * @mtx: mutex used to lock data in this struct * @cleanup_work: work struct used for cleanup that can't be done directly */ @@ -1505,8 +1642,8 @@ struct wireless_dev { struct list_head list; struct net_device *netdev; - struct list_head action_registrations; - spinlock_t action_registrations_lock; + struct list_head mgmt_registrations; + spinlock_t mgmt_registrations_lock; struct mutex mtx; @@ -1563,8 +1700,10 @@ static inline void *wdev_priv(struct wireless_dev *wdev) return wiphy_priv(wdev->wiphy); } -/* - * Utility functions +/** + * DOC: Utility functions + * + * cfg80211 offers a number of utility functions that can be useful. */ /** @@ -1715,7 +1854,15 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb); * ieee80211_hdrlen - get header length in bytes from frame control * @fc: frame control field in little-endian format */ -unsigned int ieee80211_hdrlen(__le16 fc); +unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc); + +/** + * DOC: Data path helpers + * + * In addition to generic utilities, cfg80211 also offers + * functions that help implement the data path for devices + * that do not do the 802.11/802.3 conversion on the device. + */ /** * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 @@ -1777,8 +1924,10 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb); */ const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len); -/* - * Regulatory helper functions for wiphys +/** + * DOC: Regulatory enforcement infrastructure + * + * TODO */ /** @@ -2181,6 +2330,20 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp); /** + * DOC: RFkill integration + * + * RFkill integration in cfg80211 is almost invisible to drivers, + * as cfg80211 automatically registers an rfkill instance for each + * wireless device it knows about. Soft kill is also translated + * into disconnecting and turning all interfaces off, drivers are + * expected to turn off the device when all interfaces are down. + * + * However, devices may have a hard RFkill line, in which case they + * also need to interact with the rfkill subsystem, via cfg80211. + * They can do this with a few helper functions documented here. + */ + +/** * wiphy_rfkill_set_hw_state - notify cfg80211 about hw block state * @wiphy: the wiphy * @blocked: block status @@ -2201,6 +2364,17 @@ void wiphy_rfkill_stop_polling(struct wiphy *wiphy); #ifdef CONFIG_NL80211_TESTMODE /** + * DOC: Test mode + * + * Test mode is a set of utility functions to allow drivers to + * interact with driver-specific tools to aid, for instance, + * factory programming. + * + * This chapter describes how drivers interact with it, for more + * information see the nl80211 book's chapter on it. + */ + +/** * cfg80211_testmode_alloc_reply_skb - allocate testmode reply * @wiphy: the wiphy * @approxlen: an upper bound of the length of the data that will @@ -2373,38 +2547,39 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp); /** - * cfg80211_rx_action - notification of received, unprocessed Action frame + * cfg80211_rx_mgmt - notification of received, unprocessed management frame * @dev: network device * @freq: Frequency on which the frame was received in MHz - * @buf: Action frame (header + body) + * @buf: Management frame (header + body) * @len: length of the frame data * @gfp: context flags - * Returns %true if a user space application is responsible for rejecting the - * unrecognized Action frame; %false if no such application is registered - * (i.e., the driver is responsible for rejecting the unrecognized Action - * frame) + * + * Returns %true if a user space application has registered for this frame. + * For action frames, that makes it responsible for rejecting unrecognized + * action frames; %false otherwise, in which case for action frames the + * driver is responsible for rejecting the frame. * * This function is called whenever an Action frame is received for a station * mode interface, but is not processed in kernel. */ -bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, - size_t len, gfp_t gfp); +bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf, + size_t len, gfp_t gfp); /** - * cfg80211_action_tx_status - notification of TX status for Action frame + * cfg80211_mgmt_tx_status - notification of TX status for management frame * @dev: network device - * @cookie: Cookie returned by cfg80211_ops::action() - * @buf: Action frame (header + body) + * @cookie: Cookie returned by cfg80211_ops::mgmt_tx() + * @buf: Management frame (header + body) * @len: length of the frame data * @ack: Whether frame was acknowledged * @gfp: context flags * - * This function is called whenever an Action frame was requested to be - * transmitted with cfg80211_ops::action() to report the TX status of the + * This function is called whenever a management frame was requested to be + * transmitted with cfg80211_ops::mgmt_tx() to report the TX status of the * transmission attempt. */ -void cfg80211_action_tx_status(struct net_device *dev, u64 cookie, - const u8 *buf, size_t len, bool ack, gfp_t gfp); +void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie, + const u8 *buf, size_t len, bool ack, gfp_t gfp); /** @@ -2420,56 +2595,41 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); -#ifdef __KERNEL__ - /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ #define wiphy_printk(level, wiphy, format, args...) \ - printk(level "%s: " format, wiphy_name(wiphy), ##args) + dev_printk(level, &(wiphy)->dev, format, ##args) #define wiphy_emerg(wiphy, format, args...) \ - wiphy_printk(KERN_EMERG, wiphy, format, ##args) + dev_emerg(&(wiphy)->dev, format, ##args) #define wiphy_alert(wiphy, format, args...) \ - wiphy_printk(KERN_ALERT, wiphy, format, ##args) + dev_alert(&(wiphy)->dev, format, ##args) #define wiphy_crit(wiphy, format, args...) \ - wiphy_printk(KERN_CRIT, wiphy, format, ##args) + dev_crit(&(wiphy)->dev, format, ##args) #define wiphy_err(wiphy, format, args...) \ - wiphy_printk(KERN_ERR, wiphy, format, ##args) + dev_err(&(wiphy)->dev, format, ##args) #define wiphy_warn(wiphy, format, args...) \ - wiphy_printk(KERN_WARNING, wiphy, format, ##args) + dev_warn(&(wiphy)->dev, format, ##args) #define wiphy_notice(wiphy, format, args...) \ - wiphy_printk(KERN_NOTICE, wiphy, format, ##args) + dev_notice(&(wiphy)->dev, format, ##args) #define wiphy_info(wiphy, format, args...) \ - wiphy_printk(KERN_INFO, wiphy, format, ##args) + dev_info(&(wiphy)->dev, format, ##args) -int wiphy_debug(const struct wiphy *wiphy, const char *format, ...) - __attribute__ ((format (printf, 2, 3))); - -#if defined(DEBUG) -#define wiphy_dbg(wiphy, format, args...) \ +#define wiphy_debug(wiphy, format, args...) \ wiphy_printk(KERN_DEBUG, wiphy, format, ##args) -#elif defined(CONFIG_DYNAMIC_DEBUG) + #define wiphy_dbg(wiphy, format, args...) \ - dynamic_pr_debug("%s: " format, wiphy_name(wiphy), ##args) -#else -#define wiphy_dbg(wiphy, format, args...) \ -({ \ - if (0) \ - wiphy_printk(KERN_DEBUG, wiphy, format, ##args); \ - 0; \ -}) -#endif + dev_dbg(&(wiphy)->dev, format, ##args) #if defined(VERBOSE_DEBUG) #define wiphy_vdbg wiphy_dbg #else - #define wiphy_vdbg(wiphy, format, args...) \ ({ \ if (0) \ wiphy_printk(KERN_DEBUG, wiphy, format, ##args); \ - 0; \ + 0; \ }) #endif @@ -2481,6 +2641,4 @@ int wiphy_debug(const struct wiphy *wiphy, const char *format, ...) #define wiphy_WARN(wiphy, format, args...) \ WARN(1, "wiphy: %s\n" format, wiphy_name(wiphy), ##args); -#endif - #endif /* __NET_CFG80211_H */ diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index ef6c24a529e1..a4dc5b027bd9 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -51,7 +51,8 @@ static inline u32 task_cls_classid(struct task_struct *p) return 0; rcu_read_lock(); - id = rcu_dereference(net_cls_subsys_id); + id = rcu_dereference_index_check(net_cls_subsys_id, + rcu_read_lock_held()); if (id >= 0) classid = container_of(task_subsys_state(p, id), struct cgroup_cls_state, css)->classid; diff --git a/include/net/dst.h b/include/net/dst.h index 02386505033d..a217c838ec0d 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -43,10 +43,11 @@ struct dst_entry { short error; short obsolete; int flags; -#define DST_HOST 1 -#define DST_NOXFRM 2 -#define DST_NOPOLICY 4 -#define DST_NOHASH 8 +#define DST_HOST 0x0001 +#define DST_NOXFRM 0x0002 +#define DST_NOPOLICY 0x0004 +#define DST_NOHASH 0x0008 +#define DST_NOCACHE 0x0010 unsigned long expires; unsigned short header_len; /* more space at head required */ @@ -228,23 +229,37 @@ static inline void skb_dst_force(struct sk_buff *skb) /** + * __skb_tunnel_rx - prepare skb for rx reinsert + * @skb: buffer + * @dev: tunnel device + * + * After decapsulation, packet is going to re-enter (netif_rx()) our stack, + * so make some cleanups. (no accounting done) + */ +static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) +{ + skb->dev = dev; + skb->rxhash = 0; + skb_set_queue_mapping(skb, 0); + skb_dst_drop(skb); + nf_reset(skb); +} + +/** * skb_tunnel_rx - prepare skb for rx reinsert * @skb: buffer * @dev: tunnel device * * After decapsulation, packet is going to re-enter (netif_rx()) our stack, * so make some cleanups, and perform accounting. + * Note: this accounting is not SMP safe. */ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) { - skb->dev = dev; /* TODO : stats should be SMP safe */ dev->stats.rx_packets++; dev->stats.rx_bytes += skb->len; - skb->rxhash = 0; - skb_set_queue_mapping(skb, 0); - skb_dst_drop(skb); - nf_reset(skb); + __skb_tunnel_rx(skb, dev); } /* Children define the path of the packet through the diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index d1ff9b7e99b8..1fa5306e3e23 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -1,6 +1,7 @@ #ifndef _NET_DST_OPS_H #define _NET_DST_OPS_H #include <linux/types.h> +#include <linux/percpu_counter.h> struct dst_entry; struct kmem_cachep; @@ -22,7 +23,41 @@ struct dst_ops { void (*update_pmtu)(struct dst_entry *dst, u32 mtu); int (*local_out)(struct sk_buff *skb); - atomic_t entries; struct kmem_cache *kmem_cachep; + + struct percpu_counter pcpuc_entries ____cacheline_aligned_in_smp; }; + +static inline int dst_entries_get_fast(struct dst_ops *dst) +{ + return percpu_counter_read_positive(&dst->pcpuc_entries); +} + +static inline int dst_entries_get_slow(struct dst_ops *dst) +{ + int res; + + local_bh_disable(); + res = percpu_counter_sum_positive(&dst->pcpuc_entries); + local_bh_enable(); + return res; +} + +static inline void dst_entries_add(struct dst_ops *dst, int val) +{ + local_bh_disable(); + percpu_counter_add(&dst->pcpuc_entries, val); + local_bh_enable(); +} + +static inline int dst_entries_init(struct dst_ops *dst) +{ + return percpu_counter_init(&dst->pcpuc_entries, 0); +} + +static inline void dst_entries_destroy(struct dst_ops *dst) +{ + percpu_counter_destroy(&dst->pcpuc_entries); +} + #endif diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index e8923bc20f9f..106f3097d384 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -31,6 +31,8 @@ struct fib_lookup_arg { void *lookup_ptr; void *result; struct fib_rule *rule; + int flags; +#define FIB_LOOKUP_NOREF 1 }; struct fib_rules_ops { @@ -106,7 +108,6 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) extern struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *, struct net *); extern void fib_rules_unregister(struct fib_rules_ops *); -extern void fib_rules_cleanup_ops(struct fib_rules_ops *); extern int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, diff --git a/include/net/flow.h b/include/net/flow.h index bb08692a20b0..0ac3fb5e0973 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -49,6 +49,7 @@ struct flowi { __u8 proto; __u8 flags; #define FLOWI_FLAG_ANYSRC 0x01 +#define FLOWI_FLAG_MATCH_ANY_IIF 0x02 union { struct { __be16 sport; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index f7dcd2c70412..8a64b811a39a 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -20,6 +20,9 @@ struct genl_multicast_group { u32 id; }; +struct genl_ops; +struct genl_info; + /** * struct genl_family - generic netlink family * @id: protocol family idenfitier @@ -29,6 +32,10 @@ struct genl_multicast_group { * @maxattr: maximum number of attributes supported * @netnsok: set to true if the family can handle network * namespaces and should be presented in all of them + * @pre_doit: called before an operation's doit callback, it may + * do additional, common, filtering and return an error + * @post_doit: called after an operation's doit callback, it may + * undo operations done by pre_doit, for example release locks * @attrbuf: buffer to store parsed attributes * @ops_list: list of all assigned operations * @family_list: family list @@ -41,6 +48,12 @@ struct genl_family { unsigned int version; unsigned int maxattr; bool netnsok; + int (*pre_doit)(struct genl_ops *ops, + struct sk_buff *skb, + struct genl_info *info); + void (*post_doit)(struct genl_ops *ops, + struct sk_buff *skb, + struct genl_info *info); struct nlattr ** attrbuf; /* private */ struct list_head ops_list; /* private */ struct list_head family_list; /* private */ @@ -55,6 +68,8 @@ struct genl_family { * @genlhdr: generic netlink message header * @userhdr: user specific header * @attrs: netlink attributes + * @_net: network namespace + * @user_ptr: user pointers */ struct genl_info { u32 snd_seq; @@ -66,6 +81,7 @@ struct genl_info { #ifdef CONFIG_NET_NS struct net * _net; #endif + void * user_ptr[2]; }; static inline struct net *genl_info_net(struct genl_info *info) @@ -81,6 +97,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) /** * struct genl_ops - generic netlink operations * @cmd: command identifier + * @internal_flags: flags used by the family * @flags: flags * @policy: attribute validation policy * @doit: standard command callback @@ -90,6 +107,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) */ struct genl_ops { u8 cmd; + u8 internal_flags; unsigned int flags; const struct nla_policy *policy; int (*doit)(struct sk_buff *skb, diff --git a/include/net/gre.h b/include/net/gre.h new file mode 100644 index 000000000000..82665474bcb7 --- /dev/null +++ b/include/net/gre.h @@ -0,0 +1,18 @@ +#ifndef __LINUX_GRE_H +#define __LINUX_GRE_H + +#include <linux/skbuff.h> + +#define GREPROTO_CISCO 0 +#define GREPROTO_PPTP 1 +#define GREPROTO_MAX 2 + +struct gre_protocol { + int (*handler)(struct sk_buff *skb); + void (*err_handler)(struct sk_buff *skb, u32 info); +}; + +int gre_add_protocol(const struct gre_protocol *proto, u8 version); +int gre_del_protocol(const struct gre_protocol *proto, u8 version); + +#endif diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index b6d3b55da19b..e4f494b42e06 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -125,6 +125,7 @@ struct inet_connection_sock { int probe_size; } icsk_mtup; u32 icsk_ca_priv[16]; + u32 icsk_user_timeout; #define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) }; diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 9b5d08f4f6e8..88bdd010d65d 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -27,7 +27,7 @@ static inline int INET_ECN_is_not_ect(__u8 dsfield) static inline int INET_ECN_is_capable(__u8 dsfield) { - return (dsfield & INET_ECN_ECT_0); + return dsfield & INET_ECN_ECT_0; } static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 74358d1b3f43..e9c2ed8af864 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -245,7 +245,7 @@ static inline int inet_sk_listen_hashfn(const struct sock *sk) } /* Caller must disable local BH processing. */ -extern void __inet_inherit_port(struct sock *sk, struct sock *child); +extern int __inet_inherit_port(struct sock *sk, struct sock *child); extern void inet_put_port(struct sock *sk); diff --git a/include/net/ip.h b/include/net/ip.h index 890f9725d681..dbee3fe260e1 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -53,7 +53,7 @@ struct ipcm_cookie { __be32 addr; int oif; struct ip_options *opt; - union skb_shared_tx shtx; + __u8 tx_flags; }; #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) @@ -238,9 +238,9 @@ int ip_decrease_ttl(struct iphdr *iph) static inline int ip_dont_fragment(struct sock *sk, struct dst_entry *dst) { - return (inet_sk(sk)->pmtudisc == IP_PMTUDISC_DO || + return inet_sk(sk)->pmtudisc == IP_PMTUDISC_DO || (inet_sk(sk)->pmtudisc == IP_PMTUDISC_WANT && - !(dst_metric_locked(dst, RTAX_MTU)))); + !(dst_metric_locked(dst, RTAX_MTU))); } extern void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index c93f94edc610..ba3666d31766 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -86,6 +86,7 @@ struct fib_info { #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_power; #endif + struct rcu_head rcu; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev }; @@ -148,7 +149,7 @@ struct fib_table { }; extern int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, - struct fib_result *res); + struct fib_result *res, int fib_flags); extern int fib_table_insert(struct fib_table *, struct fib_config *); extern int fib_table_delete(struct fib_table *, struct fib_config *); extern int fib_table_dump(struct fib_table *table, struct sk_buff *skb, @@ -185,11 +186,11 @@ static inline int fib_lookup(struct net *net, const struct flowi *flp, struct fib_table *table; table = fib_get_table(net, RT_TABLE_LOCAL); - if (!fib_table_lookup(table, flp, res)) + if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF)) return 0; table = fib_get_table(net, RT_TABLE_MAIN); - if (!fib_table_lookup(table, flp, res)) + if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF)) return 0; return -ENETUNREACH; } @@ -254,16 +255,6 @@ static inline void fib_info_put(struct fib_info *fi) free_fib_info(fi); } -static inline void fib_res_put(struct fib_result *res) -{ - if (res->fi) - fib_info_put(res->fi); -#ifdef CONFIG_IP_MULTIPLE_TABLES - if (res->r) - fib_rule_put(res->r); -#endif -} - #ifdef CONFIG_PROC_FS extern int __net_init fib_proc_init(struct net *net); extern void __net_exit fib_proc_exit(struct net *net); diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index f976885f686f..b7bbd6c28cfa 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -25,7 +25,9 @@ #include <linux/ip.h> #include <linux/ipv6.h> /* for struct ipv6hdr */ #include <net/ipv6.h> /* for ipv6_addr_copy */ - +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#include <net/netfilter/nf_conntrack.h> +#endif /* Connections' size value needed by ip_vs_ctl.c */ extern int ip_vs_conn_tab_size; @@ -134,24 +136,24 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, if (net_ratelimit()) \ printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__); \ } while (0) -#define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) \ +#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg) \ do { \ if (level <= ip_vs_get_debug_level()) \ - pp->debug_packet(pp, skb, ofs, msg); \ + pp->debug_packet(af, pp, skb, ofs, msg); \ } while (0) -#define IP_VS_DBG_RL_PKT(level, pp, skb, ofs, msg) \ +#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg) \ do { \ if (level <= ip_vs_get_debug_level() && \ net_ratelimit()) \ - pp->debug_packet(pp, skb, ofs, msg); \ + pp->debug_packet(af, pp, skb, ofs, msg); \ } while (0) #else /* NO DEBUGGING at ALL */ #define IP_VS_DBG_BUF(level, msg...) do {} while (0) #define IP_VS_ERR_BUF(msg...) do {} while (0) #define IP_VS_DBG(level, msg...) do {} while (0) #define IP_VS_DBG_RL(msg...) do {} while (0) -#define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) do {} while (0) -#define IP_VS_DBG_RL_PKT(level, pp, skb, ofs, msg) do {} while (0) +#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg) do {} while (0) +#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg) do {} while (0) #endif #define IP_VS_BUG() BUG() @@ -343,7 +345,7 @@ struct ip_vs_protocol { int (*app_conn_bind)(struct ip_vs_conn *cp); - void (*debug_packet)(struct ip_vs_protocol *pp, + void (*debug_packet)(int af, struct ip_vs_protocol *pp, const struct sk_buff *skb, int offset, const char *msg); @@ -355,6 +357,19 @@ struct ip_vs_protocol { extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto); +struct ip_vs_conn_param { + const union nf_inet_addr *caddr; + const union nf_inet_addr *vaddr; + __be16 cport; + __be16 vport; + __u16 protocol; + u16 af; + + const struct ip_vs_pe *pe; + char *pe_data; + __u8 pe_data_len; +}; + /* * IP_VS structure allocated for each dynamically scheduled connection */ @@ -366,6 +381,7 @@ struct ip_vs_conn { union nf_inet_addr caddr; /* client address */ union nf_inet_addr vaddr; /* virtual address */ union nf_inet_addr daddr; /* destination address */ + volatile __u32 flags; /* status flags */ __be16 cport; __be16 vport; __be16 dport; @@ -378,7 +394,6 @@ struct ip_vs_conn { /* Flags and state transition */ spinlock_t lock; /* lock for state transition */ - volatile __u16 flags; /* status flags */ volatile __u16 state; /* state info */ volatile __u16 old_state; /* old state, to be used for * state transition triggerd @@ -394,6 +409,7 @@ struct ip_vs_conn { /* packet transmitter for different forwarding methods. If it mangles the packet, it must return NF_DROP or better NF_STOLEN, otherwise this must be changed to a sk_buff **. + NF_ACCEPT can be returned when destination is local. */ int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); @@ -405,6 +421,9 @@ struct ip_vs_conn { void *app_data; /* Application private data */ struct ip_vs_seq in_seq; /* incoming seq. struct */ struct ip_vs_seq out_seq; /* outgoing seq. struct */ + + char *pe_data; + __u8 pe_data_len; }; @@ -426,6 +445,7 @@ struct ip_vs_service_user_kern { /* virtual service options */ char *sched_name; + char *pe_name; unsigned flags; /* virtual service flags */ unsigned timeout; /* persistent timeout in sec */ u32 netmask; /* persistent netmask */ @@ -475,6 +495,9 @@ struct ip_vs_service { struct ip_vs_scheduler *scheduler; /* bound scheduler object */ rwlock_t sched_lock; /* lock sched_data */ void *sched_data; /* scheduler application data */ + + /* alternate persistence engine */ + struct ip_vs_pe *pe; }; @@ -507,6 +530,10 @@ struct ip_vs_dest { spinlock_t dst_lock; /* lock of dst_cache */ struct dst_entry *dst_cache; /* destination cache entry */ u32 dst_rtos; /* RT_TOS(tos) for dst */ + u32 dst_cookie; +#ifdef CONFIG_IP_VS_IPV6 + struct in6_addr dst_saddr; +#endif /* for virtual service */ struct ip_vs_service *svc; /* service it belongs to */ @@ -538,6 +565,21 @@ struct ip_vs_scheduler { const struct sk_buff *skb); }; +/* The persistence engine object */ +struct ip_vs_pe { + struct list_head n_list; /* d-linked list head */ + char *name; /* scheduler name */ + atomic_t refcnt; /* reference counter */ + struct module *module; /* THIS_MODULE/NULL */ + + /* get the connection template, if any */ + int (*fill_param)(struct ip_vs_conn_param *p, struct sk_buff *skb); + bool (*ct_match)(const struct ip_vs_conn_param *p, + struct ip_vs_conn *ct); + u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval, + bool inverse); + int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf); +}; /* * The application module object (a.k.a. app incarnation) @@ -556,11 +598,19 @@ struct ip_vs_app { __be16 port; /* port number in net order */ atomic_t usecnt; /* usage counter */ - /* output hook: return false if can't linearize. diff set for TCP. */ + /* + * output hook: Process packet in inout direction, diff set for TCP. + * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok, + * 2=Mangled but checksum was not updated + */ int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *, struct sk_buff *, int *diff); - /* input hook: return false if can't linearize. diff set for TCP. */ + /* + * input hook: Process packet in outin direction, diff set for TCP. + * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok, + * 2=Mangled but checksum was not updated + */ int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *, struct sk_buff *, int *diff); @@ -624,13 +674,25 @@ enum { IP_VS_DIR_LAST, }; -extern struct ip_vs_conn *ip_vs_conn_in_get -(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, - const union nf_inet_addr *d_addr, __be16 d_port); +static inline void ip_vs_conn_fill_param(int af, int protocol, + const union nf_inet_addr *caddr, + __be16 cport, + const union nf_inet_addr *vaddr, + __be16 vport, + struct ip_vs_conn_param *p) +{ + p->af = af; + p->protocol = protocol; + p->caddr = caddr; + p->cport = cport; + p->vaddr = vaddr; + p->vport = vport; + p->pe = NULL; + p->pe_data = NULL; +} -extern struct ip_vs_conn *ip_vs_ct_in_get -(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, - const union nf_inet_addr *d_addr, __be16 d_port); +struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p); +struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p); struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, @@ -638,9 +700,7 @@ struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, unsigned int proto_off, int inverse); -extern struct ip_vs_conn *ip_vs_conn_out_get -(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, - const union nf_inet_addr *d_addr, __be16 d_port); +struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p); struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, @@ -656,11 +716,10 @@ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) extern void ip_vs_conn_put(struct ip_vs_conn *cp); extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport); -extern struct ip_vs_conn * -ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, - const union nf_inet_addr *vaddr, __be16 vport, - const union nf_inet_addr *daddr, __be16 dport, unsigned flags, - struct ip_vs_dest *dest); +struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, + const union nf_inet_addr *daddr, + __be16 dport, unsigned flags, + struct ip_vs_dest *dest); extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp); extern const char * ip_vs_state_name(__u16 proto, int state); @@ -751,6 +810,12 @@ extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb); extern int ip_vs_app_init(void); extern void ip_vs_app_cleanup(void); +void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe); +void ip_vs_unbind_pe(struct ip_vs_service *svc); +int register_ip_vs_pe(struct ip_vs_pe *pe); +int unregister_ip_vs_pe(struct ip_vs_pe *pe); +extern struct ip_vs_pe *ip_vs_pe_get(const char *name); +extern void ip_vs_pe_put(struct ip_vs_pe *pe); /* * IPVS protocol functions (from ip_vs_proto.c) @@ -763,7 +828,8 @@ extern int ip_vs_set_state_timeout(int *table, int num, const char *const *names, const char *name, int to); extern void -ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, +ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, + const struct sk_buff *skb, int offset, const char *msg); extern struct ip_vs_protocol ip_vs_protocol_tcp; @@ -785,7 +851,8 @@ extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc); extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); extern struct ip_vs_conn * -ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb); +ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, + struct ip_vs_protocol *pp, int *ignored); extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_protocol *pp); @@ -798,6 +865,8 @@ extern int sysctl_ip_vs_expire_nodest_conn; extern int sysctl_ip_vs_expire_quiescent_template; extern int sysctl_ip_vs_sync_threshold[2]; extern int sysctl_ip_vs_nat_icmp_send; +extern int sysctl_ip_vs_conntrack; +extern int sysctl_ip_vs_snat_reroute; extern struct ip_vs_stats ip_vs_stats; extern const struct ctl_path net_vs_ctl_path[]; @@ -955,8 +1024,65 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) return csum_partial(diff, sizeof(diff), oldsum); } +/* + * Forget current conntrack (unconfirmed) and attach notrack entry + */ +static inline void ip_vs_notrack(struct sk_buff *skb) +{ +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); + + if (!ct || !nf_ct_is_untracked(ct)) { + nf_reset(skb); + skb->nfct = &nf_ct_untracked_get()->ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); + } +#endif +} + +#ifdef CONFIG_IP_VS_NFCT +/* + * Netfilter connection tracking + * (from ip_vs_nfct.c) + */ +static inline int ip_vs_conntrack_enabled(void) +{ + return sysctl_ip_vs_conntrack; +} + extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin); +extern int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp); +extern void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, + struct ip_vs_conn *cp, u_int8_t proto, + const __be16 port, int from_rs); +extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp); + +#else + +static inline int ip_vs_conntrack_enabled(void) +{ + return 0; +} + +static inline void ip_vs_update_conntrack(struct sk_buff *skb, + struct ip_vs_conn *cp, int outin) +{ +} + +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, + struct ip_vs_conn *cp) +{ + return NF_ACCEPT; +} + +static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) +{ +} +/* CONFIG_IP_VS_NFCT */ +#endif #endif /* __KERNEL__ */ diff --git a/include/net/ipip.h b/include/net/ipip.h index 65caea8b414f..58abbf966b0c 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -45,7 +45,7 @@ struct ip_tunnel_prl_entry { struct rcu_head rcu_head; }; -#define IPTUNNEL_XMIT() do { \ +#define __IPTUNNEL_XMIT(stats1, stats2) do { \ int err; \ int pkt_len = skb->len - skb_transport_offset(skb); \ \ @@ -54,12 +54,14 @@ struct ip_tunnel_prl_entry { \ err = ip_local_out(skb); \ if (likely(net_xmit_eval(err) == 0)) { \ - txq->tx_bytes += pkt_len; \ - txq->tx_packets++; \ + (stats1)->tx_bytes += pkt_len; \ + (stats1)->tx_packets++; \ } else { \ - stats->tx_errors++; \ - stats->tx_aborted_errors++; \ + (stats2)->tx_errors++; \ + (stats2)->tx_aborted_errors++; \ } \ } while (0) +#define IPTUNNEL_XMIT() __IPTUNNEL_XMIT(txq, stats) + #endif diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 1f8412410998..4a3cd2cd2f5e 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -262,7 +262,7 @@ static inline int ipv6_addr_scope(const struct in6_addr *addr) static inline int __ipv6_addr_src_scope(int type) { - return (type == IPV6_ADDR_ANY ? __IPV6_ADDR_SCOPE_INVALID : (type >> 16)); + return (type == IPV6_ADDR_ANY) ? __IPV6_ADDR_SCOPE_INVALID : (type >> 16); } static inline int ipv6_addr_src_scope(const struct in6_addr *addr) @@ -279,10 +279,10 @@ static inline int ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m, const struct in6_addr *a2) { - return (!!(((a1->s6_addr32[0] ^ a2->s6_addr32[0]) & m->s6_addr32[0]) | - ((a1->s6_addr32[1] ^ a2->s6_addr32[1]) & m->s6_addr32[1]) | - ((a1->s6_addr32[2] ^ a2->s6_addr32[2]) & m->s6_addr32[2]) | - ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3]))); + return !!(((a1->s6_addr32[0] ^ a2->s6_addr32[0]) & m->s6_addr32[0]) | + ((a1->s6_addr32[1] ^ a2->s6_addr32[1]) & m->s6_addr32[1]) | + ((a1->s6_addr32[2] ^ a2->s6_addr32[2]) & m->s6_addr32[2]) | + ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3])); } static inline void ipv6_addr_copy(struct in6_addr *a1, const struct in6_addr *a2) @@ -317,10 +317,10 @@ static inline void ipv6_addr_set(struct in6_addr *addr, static inline int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) { - return (((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | - (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | - (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | - (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0); + return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | + (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | + (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | + (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0; } static inline int __ipv6_prefix_equal(const __be32 *a1, const __be32 *a2, @@ -373,20 +373,20 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a); static inline int ipv6_addr_any(const struct in6_addr *a) { - return ((a->s6_addr32[0] | a->s6_addr32[1] | - a->s6_addr32[2] | a->s6_addr32[3] ) == 0); + return (a->s6_addr32[0] | a->s6_addr32[1] | + a->s6_addr32[2] | a->s6_addr32[3]) == 0; } static inline int ipv6_addr_loopback(const struct in6_addr *a) { - return ((a->s6_addr32[0] | a->s6_addr32[1] | - a->s6_addr32[2] | (a->s6_addr32[3] ^ htonl(1))) == 0); + return (a->s6_addr32[0] | a->s6_addr32[1] | + a->s6_addr32[2] | (a->s6_addr32[3] ^ htonl(1))) == 0; } static inline int ipv6_addr_v4mapped(const struct in6_addr *a) { - return ((a->s6_addr32[0] | a->s6_addr32[1] | - (a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0); + return (a->s6_addr32[0] | a->s6_addr32[1] | + (a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0; } /* @@ -395,8 +395,7 @@ static inline int ipv6_addr_v4mapped(const struct in6_addr *a) */ static inline int ipv6_addr_orchid(const struct in6_addr *a) { - return ((a->s6_addr32[0] & htonl(0xfffffff0)) - == htonl(0x20010010)); + return (a->s6_addr32[0] & htonl(0xfffffff0)) == htonl(0x20010010); } static inline void ipv6_addr_set_v4mapped(const __be32 addr, @@ -441,7 +440,7 @@ static inline int __ipv6_addr_diff(const void *token1, const void *token2, int a * if returned value is greater than prefix length. * --ANK (980803) */ - return (addrlen << 5); + return addrlen << 5; } static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_addr *a2) diff --git a/include/net/irda/irlan_common.h b/include/net/irda/irlan_common.h index 73cacb3ac16c..0af8b8dfbc22 100644 --- a/include/net/irda/irlan_common.h +++ b/include/net/irda/irlan_common.h @@ -171,7 +171,6 @@ struct irlan_cb { int magic; struct list_head dev_list; struct net_device *dev; /* Ethernet device structure*/ - struct net_device_stats stats; __u32 saddr; /* Source device address */ __u32 daddr; /* Destination device address */ diff --git a/include/net/irda/irlan_event.h b/include/net/irda/irlan_event.h index 6d9539f05806..018b5a77e610 100644 --- a/include/net/irda/irlan_event.h +++ b/include/net/irda/irlan_event.h @@ -67,7 +67,7 @@ typedef enum { IRLAN_WATCHDOG_TIMEOUT, } IRLAN_EVENT; -extern char *irlan_state[]; +extern const char * const irlan_state[]; void irlan_do_client_event(struct irlan_cb *self, IRLAN_EVENT event, struct sk_buff *skb); diff --git a/include/net/irda/irlap.h b/include/net/irda/irlap.h index 9d0c78ea92f5..17fcd964f9d9 100644 --- a/include/net/irda/irlap.h +++ b/include/net/irda/irlap.h @@ -282,7 +282,7 @@ static inline int irlap_is_primary(struct irlap_cb *self) default: ret = -1; } - return(ret); + return ret; } /* Clear a pending IrLAP disconnect. - Jean II */ diff --git a/include/net/irda/irlmp.h b/include/net/irda/irlmp.h index 3ffc1d0f93d6..fff11b7fe8a4 100644 --- a/include/net/irda/irlmp.h +++ b/include/net/irda/irlmp.h @@ -274,7 +274,7 @@ static inline int irlmp_lap_tx_queue_full(struct lsap_cb *self) if (self->lap->irlap == NULL) return 0; - return(IRLAP_GET_TX_QUEUE_LEN(self->lap->irlap) >= LAP_HIGH_THRESHOLD); + return IRLAP_GET_TX_QUEUE_LEN(self->lap->irlap) >= LAP_HIGH_THRESHOLD; } /* After doing a irlmp_dup(), this get one of the two socket back into diff --git a/include/net/irda/irttp.h b/include/net/irda/irttp.h index 11aee7a2972a..af4b87721d13 100644 --- a/include/net/irda/irttp.h +++ b/include/net/irda/irttp.h @@ -204,7 +204,7 @@ static inline int irttp_is_primary(struct tsap_cb *self) (self->lsap->lap == NULL) || (self->lsap->lap->irlap == NULL)) return -2; - return(irlap_is_primary(self->lsap->lap->irlap)); + return irlap_is_primary(self->lsap->lap->irlap); } #endif /* IRTTP_H */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b0787a1dea90..9fdf982d1286 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -149,6 +149,7 @@ struct ieee80211_low_level_stats { * @BSS_CHANGED_ARP_FILTER: Hardware ARP filter address list or state changed. * @BSS_CHANGED_QOS: QoS for this association was enabled/disabled. Note * that it is only ever disabled for station mode. + * @BSS_CHANGED_IDLE: Idle changed for this BSS/interface. */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -165,6 +166,7 @@ enum ieee80211_bss_change { BSS_CHANGED_IBSS = 1<<11, BSS_CHANGED_ARP_FILTER = 1<<12, BSS_CHANGED_QOS = 1<<13, + BSS_CHANGED_IDLE = 1<<14, /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -223,6 +225,9 @@ enum ieee80211_bss_change { * hardware must not perform any ARP filtering. Note, that the filter will * be enabled also in promiscuous mode. * @qos: This is a QoS-enabled BSS. + * @idle: This interface is idle. There's also a global idle flag in the + * hardware config which may be more appropriate depending on what + * your driver/device needs to do. */ struct ieee80211_bss_conf { const u8 *bssid; @@ -247,6 +252,7 @@ struct ieee80211_bss_conf { u8 arp_addr_cnt; bool arp_filter_enabled; bool qos; + bool idle; }; /** @@ -315,6 +321,9 @@ struct ieee80211_bss_conf { * @IEEE80211_TX_CTL_LDPC: tells the driver to use LDPC for this frame * @IEEE80211_TX_CTL_STBC: Enables Space-Time Block Coding (STBC) for this * frame and selects the maximum number of streams that it can use. + * + * Note: If you have to add new flags to the enumeration, then don't + * forget to update %IEEE80211_TX_TEMPORARY_FLAGS when necessary. */ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_REQ_TX_STATUS = BIT(0), @@ -344,6 +353,19 @@ enum mac80211_tx_control_flags { #define IEEE80211_TX_CTL_STBC_SHIFT 23 +/* + * This definition is used as a mask to clear all temporary flags, which are + * set by the tx handlers for each transmission attempt by the mac80211 stack. + */ +#define IEEE80211_TX_TEMPORARY_FLAGS (IEEE80211_TX_CTL_NO_ACK | \ + IEEE80211_TX_CTL_CLEAR_PS_FILT | IEEE80211_TX_CTL_FIRST_FRAGMENT | \ + IEEE80211_TX_CTL_SEND_AFTER_DTIM | IEEE80211_TX_CTL_AMPDU | \ + IEEE80211_TX_STAT_TX_FILTERED | IEEE80211_TX_STAT_ACK | \ + IEEE80211_TX_STAT_AMPDU | IEEE80211_TX_STAT_AMPDU_NO_BACK | \ + IEEE80211_TX_CTL_RATE_CTRL_PROBE | IEEE80211_TX_CTL_PSPOLL_RESPONSE | \ + IEEE80211_TX_CTL_MORE_FRAMES | IEEE80211_TX_CTL_LDPC | \ + IEEE80211_TX_CTL_STBC) + /** * enum mac80211_rate_control_flags - per-rate flags set by the * Rate Control algorithm. @@ -559,9 +581,6 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_HT: HT MCS was used and rate_idx is MCS index * @RX_FLAG_40MHZ: HT40 (40 MHz) was used * @RX_FLAG_SHORT_GI: Short guard interval was used - * @RX_FLAG_INTERNAL_CMTR: set internally after frame was reported - * on cooked monitor to avoid double-reporting it for multiple - * virtual interfaces */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = 1<<0, @@ -575,7 +594,6 @@ enum mac80211_rx_flags { RX_FLAG_HT = 1<<9, RX_FLAG_40MHZ = 1<<10, RX_FLAG_SHORT_GI = 1<<11, - RX_FLAG_INTERNAL_CMTR = 1<<12, }; /** @@ -596,6 +614,7 @@ enum mac80211_rx_flags { * @rate_idx: index of data rate into band's supported rates or MCS index if * HT rates are use (RX_FLAG_HT) * @flag: %RX_FLAG_* + * @rx_flags: internal RX flags for mac80211 */ struct ieee80211_rx_status { u64 mactime; @@ -605,6 +624,7 @@ struct ieee80211_rx_status { int antenna; int rate_idx; int flag; + unsigned int rx_flags; }; /** @@ -763,6 +783,8 @@ struct ieee80211_channel_switch { * @bss_conf: BSS configuration for this interface, either our own * or the BSS we're associated to * @addr: address of this interface + * @p2p: indicates whether this AP or STA interface is a p2p + * interface, i.e. a GO or p2p-sta respectively * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *). */ @@ -770,6 +792,7 @@ struct ieee80211_vif { enum nl80211_iftype type; struct ieee80211_bss_conf bss_conf; u8 addr[ETH_ALEN]; + bool p2p; /* must be last */ u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *)))); }; @@ -783,20 +806,6 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) } /** - * enum ieee80211_key_alg - key algorithm - * @ALG_WEP: WEP40 or WEP104 - * @ALG_TKIP: TKIP - * @ALG_CCMP: CCMP (AES) - * @ALG_AES_CMAC: AES-128-CMAC - */ -enum ieee80211_key_alg { - ALG_WEP, - ALG_TKIP, - ALG_CCMP, - ALG_AES_CMAC, -}; - -/** * enum ieee80211_key_flags - key flags * * These flags are used for communication about keys between the driver @@ -833,7 +842,7 @@ enum ieee80211_key_flags { * @hw_key_idx: To be set by the driver, this is the key index the driver * wants to be given when a frame is transmitted and needs to be * encrypted in hardware. - * @alg: The key algorithm. + * @cipher: The key's cipher suite selector. * @flags: key flags, see &enum ieee80211_key_flags. * @keyidx: the key index (0-3) * @keylen: key material length @@ -846,7 +855,7 @@ enum ieee80211_key_flags { * @iv_len: The IV length for this key type */ struct ieee80211_key_conf { - enum ieee80211_key_alg alg; + u32 cipher; u8 icv_len; u8 iv_len; u8 hw_key_idx; @@ -1032,6 +1041,13 @@ enum ieee80211_tkip_key_type { * @IEEE80211_HW_NEED_DTIM_PERIOD: * This device needs to know the DTIM period for the BSS before * associating. + * + * @IEEE80211_HW_SUPPORTS_PER_STA_GTK: The device's crypto engine supports + * per-station GTKs as used by IBSS RSN or during fast transition. If + * the device doesn't support per-station GTKs, but can be asked not + * to decrypt group addressed frames, then IBSS RSN support is still + * possible but software crypto will be used. Advertise the wiphy flag + * only in that case. */ enum ieee80211_hw_flags { IEEE80211_HW_HAS_RATE_CONTROL = 1<<0, @@ -1055,6 +1071,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_REPORTS_TX_ACK_STATUS = 1<<18, IEEE80211_HW_CONNECTION_MONITOR = 1<<19, IEEE80211_HW_SUPPORTS_CQM_RSSI = 1<<20, + IEEE80211_HW_SUPPORTS_PER_STA_GTK = 1<<21, }; /** @@ -1100,8 +1117,15 @@ enum ieee80211_hw_flags { * @sta_data_size: size (in bytes) of the drv_priv data area * within &struct ieee80211_sta. * - * @max_rates: maximum number of alternate rate retry stages + * @max_rates: maximum number of alternate rate retry stages the hw + * can handle. + * @max_report_rates: maximum number of alternate rate retry stages + * the hw can report back. * @max_rate_tries: maximum number of tries for each stage + * + * @napi_weight: weight used for NAPI polling. You must specify an + * appropriate value here if a napi_poll operation is provided + * by your driver. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -1113,10 +1137,12 @@ struct ieee80211_hw { int channel_change_time; int vif_data_size; int sta_data_size; + int napi_weight; u16 queues; u16 max_listen_interval; s8 max_signal; u8 max_rates; + u8 max_report_rates; u8 max_rate_tries; }; @@ -1245,8 +1271,8 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * %IEEE80211_CONF_PS flag enabled means that the powersave mode defined in * IEEE 802.11-2007 section 11.2 is enabled. This is not to be confused * with hardware wakeup and sleep states. Driver is responsible for waking - * up the hardware before issueing commands to the hardware and putting it - * back to sleep at approriate times. + * up the hardware before issuing commands to the hardware and putting it + * back to sleep at appropriate times. * * When PS is enabled, hardware needs to wakeup for beacons and receive the * buffered multicast/broadcast frames after the beacon. Also it must be @@ -1267,7 +1293,7 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * there's data traffic and still saving significantly power in idle * periods. * - * Dynamic powersave is supported by simply mac80211 enabling and disabling + * Dynamic powersave is simply supported by mac80211 enabling and disabling * PS based on traffic. Driver needs to only set %IEEE80211_HW_SUPPORTS_PS * flag and mac80211 will handle everything automatically. Additionally, * hardware having support for the dynamic PS feature may set the @@ -1452,12 +1478,14 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * honour this flag if possible. * * @FIF_CONTROL: pass control frames (except for PS Poll), if PROMISC_IN_BSS - * is not set then only those addressed to this station. + * is not set then only those addressed to this station. * * @FIF_OTHER_BSS: pass frames destined to other BSSes * - * @FIF_PSPOLL: pass PS Poll frames, if PROMISC_IN_BSS is not set then only - * those addressed to this station. + * @FIF_PSPOLL: pass PS Poll frames, if PROMISC_IN_BSS is not set then only + * those addressed to this station. + * + * @FIF_PROBE_REQ: pass probe request frames */ enum ieee80211_filter_flags { FIF_PROMISC_IN_BSS = 1<<0, @@ -1468,6 +1496,7 @@ enum ieee80211_filter_flags { FIF_CONTROL = 1<<5, FIF_OTHER_BSS = 1<<6, FIF_PSPOLL = 1<<7, + FIF_PROBE_REQ = 1<<8, }; /** @@ -1540,6 +1569,12 @@ enum ieee80211_ampdu_mlme_action { * negative error code (which will be seen in userspace.) * Must be implemented and can sleep. * + * @change_interface: Called when a netdevice changes type. This callback + * is optional, but only if it is supported can interface types be + * switched while the interface is UP. The callback may sleep. + * Note that while an interface is being switched, it will not be + * found by the interface iteration callbacks. + * * @remove_interface: Notifies a driver that an interface is going down. * The @stop callback is called after this if it is the last interface * and no monitor interfaces are present. @@ -1687,6 +1722,8 @@ enum ieee80211_ampdu_mlme_action { * switch operation for CSAs received from the AP may implement this * callback. They must then call ieee80211_chswitch_done() to indicate * completion of the channel switch. + * + * @napi_poll: Poll Rx queue for incoming data frames. */ struct ieee80211_ops { int (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb); @@ -1694,6 +1731,9 @@ struct ieee80211_ops { void (*stop)(struct ieee80211_hw *hw); int (*add_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); + int (*change_interface)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + enum nl80211_iftype new_type, bool p2p); void (*remove_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); int (*config)(struct ieee80211_hw *hw, u32 changed); @@ -1752,6 +1792,7 @@ struct ieee80211_ops { void (*flush)(struct ieee80211_hw *hw, bool drop); void (*channel_switch)(struct ieee80211_hw *hw, struct ieee80211_channel_switch *ch_switch); + int (*napi_poll)(struct ieee80211_hw *hw, int budget); }; /** @@ -1897,6 +1938,22 @@ void ieee80211_free_hw(struct ieee80211_hw *hw); */ void ieee80211_restart_hw(struct ieee80211_hw *hw); +/** ieee80211_napi_schedule - schedule NAPI poll + * + * Use this function to schedule NAPI polling on a device. + * + * @hw: the hardware to start polling + */ +void ieee80211_napi_schedule(struct ieee80211_hw *hw); + +/** ieee80211_napi_complete - complete NAPI polling + * + * Use this function to finish NAPI polling on a device. + * + * @hw: the hardware to stop polling + */ +void ieee80211_napi_complete(struct ieee80211_hw *hw); + /** * ieee80211_rx - receive frame * @@ -2252,7 +2309,8 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw); * * When hardware scan offload is used (i.e. the hw_scan() callback is * assigned) this function needs to be called by the driver to notify - * mac80211 that the scan finished. + * mac80211 that the scan finished. This function can be called from + * any context, including hardirq context. * * @hw: the hardware that finished the scan * @aborted: set to true if scan was aborted @@ -2267,6 +2325,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted); * This function allows the iterator function to sleep, when the iterator * function is atomic @ieee80211_iterate_active_interfaces_atomic can * be used. + * Does not iterate over a new interface during add_interface() * * @hw: the hardware struct of which the interfaces should be iterated over * @iterator: the iterator function to call @@ -2284,6 +2343,7 @@ void ieee80211_iterate_active_interfaces(struct ieee80211_hw *hw, * hardware that are currently active and calls the callback for them. * This function requires the iterator callback function to be atomic, * if that is not desired, use @ieee80211_iterate_active_interfaces instead. + * Does not iterate over a new interface during add_interface() * * @hw: the hardware struct of which the interfaces should be iterated over * @iterator: the iterator function to call, cannot sleep @@ -2385,25 +2445,28 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, const u8 *addr); /** - * ieee80211_find_sta_by_hw - find a station on hardware + * ieee80211_find_sta_by_ifaddr - find a station on hardware * * @hw: pointer as obtained from ieee80211_alloc_hw() - * @addr: station's address + * @addr: remote station's address + * @localaddr: local address (vif->sdata->vif.addr). Use NULL for 'any'. * * This function must be called under RCU lock and the * resulting pointer is only valid under RCU lock as well. * - * NOTE: This function should not be used! When mac80211 is converted - * internally to properly keep track of stations on multiple - * virtual interfaces, it will not always know which station to - * return here since a single address might be used by multiple - * logical stations (e.g. consider a station connecting to another - * BSSID on the same AP hardware without disconnecting first). + * NOTE: You may pass NULL for localaddr, but then you will just get + * the first STA that matches the remote address 'addr'. + * We can have multiple STA associated with multiple + * logical stations (e.g. consider a station connecting to another + * BSSID on the same AP hardware without disconnecting first). + * In this case, the result of this method with localaddr NULL + * is not reliable. * - * DO NOT USE THIS FUNCTION. + * DO NOT USE THIS FUNCTION with localaddr NULL if at all possible. */ -struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw, - const u8 *addr); +struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, + const u8 *addr, + const u8 *localaddr); /** * ieee80211_sta_block_awake - block station from waking up @@ -2442,7 +2505,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, * * @vif: &struct ieee80211_vif pointer from the add_interface callback. * - * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTERING and + * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTER and * %IEEE80211_CONF_PS is set, the driver needs to inform whenever the * hardware is not receiving beacons with this function. */ @@ -2453,7 +2516,7 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif); * * @vif: &struct ieee80211_vif pointer from the add_interface callback. * - * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTERING, and + * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTER, and * %IEEE80211_CONF_PS and %IEEE80211_HW_CONNECTION_MONITOR are set, the driver * needs to inform if the connection to the AP has been lost. * @@ -2518,6 +2581,34 @@ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, */ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); +/** + * ieee80211_request_smps - request SM PS transition + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @smps_mode: new SM PS mode + * + * This allows the driver to request an SM PS transition in managed + * mode. This is useful when the driver has more information than + * the stack about possible interference, for example by bluetooth. + */ +void ieee80211_request_smps(struct ieee80211_vif *vif, + enum ieee80211_smps_mode smps_mode); + +/** + * ieee80211_key_removed - disable hw acceleration for key + * @key_conf: The key hw acceleration should be disabled for + * + * This allows drivers to indicate that the given key has been + * removed from hardware acceleration, due to a new key that + * was added. Don't use this if the key can continue to be used + * for TX, if the key restriction is on RX only it is permitted + * to keep the key for TX only and not call this function. + * + * Due to locking constraints, it may only be called during + * @set_key. This function must be allowed to sleep, and the + * key it tries to disable may still be used until it returns. + */ +void ieee80211_key_removed(struct ieee80211_key_conf *key_conf); + /* Rate control API */ /** @@ -2681,4 +2772,26 @@ conf_is_ht(struct ieee80211_conf *conf) return conf->channel_type != NL80211_CHAN_NO_HT; } +static inline enum nl80211_iftype +ieee80211_iftype_p2p(enum nl80211_iftype type, bool p2p) +{ + if (p2p) { + switch (type) { + case NL80211_IFTYPE_STATION: + return NL80211_IFTYPE_P2P_CLIENT; + case NL80211_IFTYPE_AP: + return NL80211_IFTYPE_P2P_GO; + default: + break; + } + } + return type; +} + +static inline enum nl80211_iftype +ieee80211_vif_type_p2p(struct ieee80211_vif *vif) +{ + return ieee80211_iftype_p2p(vif->type, vif->p2p); +} + #endif /* MAC80211_H */ diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 242879b6c4df..55590ab16b3e 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -91,26 +91,28 @@ struct neigh_statistics { #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field) struct neighbour { - struct neighbour *next; + struct neighbour __rcu *next; struct neigh_table *tbl; struct neigh_parms *parms; - struct net_device *dev; - unsigned long used; unsigned long confirmed; unsigned long updated; __u8 flags; __u8 nud_state; __u8 type; __u8 dead; + atomic_t refcnt; + struct sk_buff_head arp_queue; + struct timer_list timer; + unsigned long used; atomic_t probes; rwlock_t lock; + seqlock_t ha_lock; unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; struct hh_cache *hh; - atomic_t refcnt; int (*output)(struct sk_buff *skb); - struct sk_buff_head arp_queue; - struct timer_list timer; const struct neigh_ops *ops; + struct rcu_head rcu; + struct net_device *dev; u8 primary_key[0]; }; @@ -138,13 +140,22 @@ struct pneigh_entry { * neighbour table manipulation */ +struct neigh_hash_table { + struct neighbour __rcu **hash_buckets; + unsigned int hash_mask; + __u32 hash_rnd; + struct rcu_head rcu; +}; + struct neigh_table { struct neigh_table *next; int family; int entry_size; int key_len; - __u32 (*hash)(const void *pkey, const struct net_device *); + __u32 (*hash)(const void *pkey, + const struct net_device *dev, + __u32 hash_rnd); int (*constructor)(struct neighbour *); int (*pconstructor)(struct pneigh_entry *); void (*pdestructor)(struct pneigh_entry *); @@ -163,11 +174,9 @@ struct neigh_table { atomic_t entries; rwlock_t lock; unsigned long last_rand; - struct kmem_cache *kmem_cachep; + struct kmem_cache *kmem_cachep; struct neigh_statistics __percpu *stats; - struct neighbour **hash_buckets; - unsigned int hash_mask; - __u32 hash_rnd; + struct neigh_hash_table __rcu *nht; struct pneigh_entry **phash_buckets; }; @@ -237,6 +246,7 @@ extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_en struct neigh_seq_state { struct seq_net_private p; struct neigh_table *tbl; + struct neigh_hash_table *nht; void *(*neigh_sub_iter)(struct neigh_seq_state *state, struct neighbour *n, loff_t *pos); unsigned int bucket; @@ -293,7 +303,10 @@ static inline void neigh_confirm(struct neighbour *neigh) static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { - neigh->used = jiffies; + unsigned long now = ACCESS_ONCE(jiffies); + + if (neigh->used != now) + neigh->used = now; if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) return __neigh_event_send(neigh, skb); return 0; @@ -364,4 +377,14 @@ struct neighbour_cb { #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) +static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, + const struct net_device *dev) +{ + unsigned int seq; + + do { + seq = read_seqbegin(&n->ha_lock); + memcpy(dst, n->ha, dev->addr_len); + } while (read_seqretry(&n->ha_lock, seq)); +} #endif diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index bd10a7908993..65af9a07cf76 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -41,6 +41,8 @@ struct net { * destroy on demand */ #endif + spinlock_t rules_mod_lock; + struct list_head list; /* list of network namespaces */ struct list_head cleanup_list; /* namespaces on death row */ struct list_head exit_list; /* Use only net_mutex */ @@ -52,7 +54,8 @@ struct net { struct ctl_table_set sysctls; #endif - struct net_device *loopback_dev; /* The loopback */ + struct sock *rtnl; /* rtnetlink socket */ + struct sock *genl_sock; struct list_head dev_base_head; struct hlist_head *dev_name_head; @@ -60,11 +63,9 @@ struct net { /* core fib_rules */ struct list_head rules_ops; - spinlock_t rules_mod_lock; - struct sock *rtnl; /* rtnetlink socket */ - struct sock *genl_sock; + struct net_device *loopback_dev; /* The loopback */ struct netns_core core; struct netns_mib mib; struct netns_packet packet; @@ -84,13 +85,15 @@ struct net { struct sock *nfnl; struct sock *nfnl_stash; #endif -#ifdef CONFIG_XFRM - struct netns_xfrm xfrm; -#endif #ifdef CONFIG_WEXT_CORE struct sk_buff_head wext_nlevents; #endif struct net_generic *gen; + + /* Note : following structs are cache line aligned */ +#ifdef CONFIG_XFRM + struct netns_xfrm xfrm; +#endif }; diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h new file mode 100644 index 000000000000..94dd54d76b48 --- /dev/null +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -0,0 +1,6 @@ +#ifndef _NF_DEFRAG_IPV6_H +#define _NF_DEFRAG_IPV6_H + +extern void nf_defrag_ipv6_enable(void); + +#endif /* _NF_DEFRAG_IPV6_H */ diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index e624dae54fa4..caf17db87dbc 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -75,7 +75,7 @@ struct nf_conntrack_helper; /* nf_conn feature for connections that have a helper */ struct nf_conn_help { /* Helper. if any */ - struct nf_conntrack_helper *helper; + struct nf_conntrack_helper __rcu *helper; union nf_conntrack_help help; diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 11e815084fcf..0f8a8c587532 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -67,9 +67,6 @@ struct nf_conntrack_expect_policy { #define NF_CT_EXPECT_CLASS_DEFAULT 0 -#define NF_CT_EXPECT_PERMANENT 0x1 -#define NF_CT_EXPECT_INACTIVE 0x2 - int nf_conntrack_expect_init(struct net *net); void nf_conntrack_expect_fini(struct net *net); @@ -85,9 +82,16 @@ struct nf_conntrack_expect * nf_ct_find_expectation(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple); -void nf_ct_unlink_expect(struct nf_conntrack_expect *exp); +void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, + u32 pid, int report); +static inline void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) +{ + nf_ct_unlink_expect_report(exp, 0, 0); +} + void nf_ct_remove_expectations(struct nf_conn *ct); void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); +void nf_ct_remove_userspace_expectations(void); /* Allocate space for an expectation: this is mandatory before calling nf_ct_expect_related. You will have to call put afterwards. */ diff --git a/include/net/netfilter/nf_nat_protocol.h b/include/net/netfilter/nf_nat_protocol.h index df17bac46bf5..93cc90d28e66 100644 --- a/include/net/netfilter/nf_nat_protocol.h +++ b/include/net/netfilter/nf_nat_protocol.h @@ -45,9 +45,6 @@ struct nf_nat_protocol { extern int nf_nat_protocol_register(const struct nf_nat_protocol *proto); extern void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto); -extern const struct nf_nat_protocol *nf_nat_proto_find_get(u_int8_t protocol); -extern void nf_nat_proto_put(const struct nf_nat_protocol *proto); - /* Built-in protocols. */ extern const struct nf_nat_protocol nf_nat_protocol_tcp; extern const struct nf_nat_protocol nf_nat_protocol_udp; diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h index 208b46f4d6d2..cd85b3bc8327 100644 --- a/include/net/netfilter/nf_tproxy_core.h +++ b/include/net/netfilter/nf_tproxy_core.h @@ -5,15 +5,201 @@ #include <linux/in.h> #include <linux/skbuff.h> #include <net/sock.h> -#include <net/inet_sock.h> +#include <net/inet_hashtables.h> +#include <net/inet6_hashtables.h> #include <net/tcp.h> +#define NFT_LOOKUP_ANY 0 +#define NFT_LOOKUP_LISTENER 1 +#define NFT_LOOKUP_ESTABLISHED 2 + /* look up and get a reference to a matching socket */ -extern struct sock * + + +/* This function is used by the 'TPROXY' target and the 'socket' + * match. The following lookups are supported: + * + * Explicit TProxy target rule + * =========================== + * + * This is used when the user wants to intercept a connection matching + * an explicit iptables rule. In this case the sockets are assumed + * matching in preference order: + * + * - match: if there's a fully established connection matching the + * _packet_ tuple, it is returned, assuming the redirection + * already took place and we process a packet belonging to an + * established connection + * + * - match: if there's a listening socket matching the redirection + * (e.g. on-port & on-ip of the connection), it is returned, + * regardless if it was bound to 0.0.0.0 or an explicit + * address. The reasoning is that if there's an explicit rule, it + * does not really matter if the listener is bound to an interface + * or to 0. The user already stated that he wants redirection + * (since he added the rule). + * + * "socket" match based redirection (no specific rule) + * =================================================== + * + * There are connections with dynamic endpoints (e.g. FTP data + * connection) that the user is unable to add explicit rules + * for. These are taken care of by a generic "socket" rule. It is + * assumed that the proxy application is trusted to open such + * connections without explicit iptables rule (except of course the + * generic 'socket' rule). In this case the following sockets are + * matched in preference order: + * + * - match: if there's a fully established connection matching the + * _packet_ tuple + * + * - match: if there's a non-zero bound listener (possibly with a + * non-local address) We don't accept zero-bound listeners, since + * then local services could intercept traffic going through the + * box. + * + * Please note that there's an overlap between what a TPROXY target + * and a socket match will match. Normally if you have both rules the + * "socket" match will be the first one, effectively all packets + * belonging to established connections going through that one. + */ +static inline struct sock * nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, - const struct net_device *in, bool listening); + const struct net_device *in, int lookup_type) +{ + struct sock *sk; + + /* look up socket */ + switch (protocol) { + case IPPROTO_TCP: + switch (lookup_type) { + case NFT_LOOKUP_ANY: + sk = __inet_lookup(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + case NFT_LOOKUP_LISTENER: + sk = inet_lookup_listener(net, &tcp_hashinfo, + daddr, dport, + in->ifindex); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too */ + + break; + case NFT_LOOKUP_ESTABLISHED: + sk = inet_lookup_established(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + default: + WARN_ON(1); + sk = NULL; + break; + } + break; + case IPPROTO_UDP: + sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + if (sk && lookup_type != NFT_LOOKUP_ANY) { + int connected = (sk->sk_state == TCP_ESTABLISHED); + int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too */ + if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) || + (lookup_type == NFT_LOOKUP_LISTENER && connected)) { + sock_put(sk); + sk = NULL; + } + } + break; + default: + WARN_ON(1); + sk = NULL; + } + + pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n", + protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk); + + return sk; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static inline struct sock * +nf_tproxy_get_sock_v6(struct net *net, const u8 protocol, + const struct in6_addr *saddr, const struct in6_addr *daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, int lookup_type) +{ + struct sock *sk; + + /* look up socket */ + switch (protocol) { + case IPPROTO_TCP: + switch (lookup_type) { + case NFT_LOOKUP_ANY: + sk = inet6_lookup(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + case NFT_LOOKUP_LISTENER: + sk = inet6_lookup_listener(net, &tcp_hashinfo, + daddr, ntohs(dport), + in->ifindex); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too */ + + break; + case NFT_LOOKUP_ESTABLISHED: + sk = __inet6_lookup_established(net, &tcp_hashinfo, + saddr, sport, daddr, ntohs(dport), + in->ifindex); + break; + default: + WARN_ON(1); + sk = NULL; + break; + } + break; + case IPPROTO_UDP: + sk = udp6_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + if (sk && lookup_type != NFT_LOOKUP_ANY) { + int connected = (sk->sk_state == TCP_ESTABLISHED); + int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too */ + if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) || + (lookup_type == NFT_LOOKUP_LISTENER && connected)) { + sock_put(sk); + sk = NULL; + } + } + break; + default: + WARN_ON(1); + sk = NULL; + } + + pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n", + protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk); + + return sk; +} +#endif static inline void nf_tproxy_put_sock(struct sock *sk) diff --git a/include/net/netfilter/xt_log.h b/include/net/netfilter/xt_log.h new file mode 100644 index 000000000000..0dfb34a5b53c --- /dev/null +++ b/include/net/netfilter/xt_log.h @@ -0,0 +1,54 @@ +#define S_SIZE (1024 - (sizeof(unsigned int) + 1)) + +struct sbuff { + unsigned int count; + char buf[S_SIZE + 1]; +}; +static struct sbuff emergency, *emergency_ptr = &emergency; + +static int sb_add(struct sbuff *m, const char *f, ...) +{ + va_list args; + int len; + + if (likely(m->count < S_SIZE)) { + va_start(args, f); + len = vsnprintf(m->buf + m->count, S_SIZE - m->count, f, args); + va_end(args); + if (likely(m->count + len < S_SIZE)) { + m->count += len; + return 0; + } + } + m->count = S_SIZE; + printk_once(KERN_ERR KBUILD_MODNAME " please increase S_SIZE\n"); + return -1; +} + +static struct sbuff *sb_open(void) +{ + struct sbuff *m = kmalloc(sizeof(*m), GFP_ATOMIC); + + if (unlikely(!m)) { + local_bh_disable(); + do { + m = xchg(&emergency_ptr, NULL); + } while (!m); + } + m->count = 0; + return m; +} + +static void sb_close(struct sbuff *m) +{ + m->buf[m->count] = 0; + printk("%s\n", m->buf); + + if (likely(m != &emergency)) + kfree(m); + else { + xchg(&emergency_ptr, m); + local_bh_enable(); + } +} + diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 74f119a2829a..748f91f87cd5 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -43,10 +43,6 @@ struct netns_xfrm { unsigned int policy_count[XFRM_POLICY_MAX * 2]; struct work_struct policy_hash_work; - struct dst_ops xfrm4_dst_ops; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct dst_ops xfrm6_dst_ops; -#endif struct sock *nlsk; struct sock *nlsk_stash; @@ -58,6 +54,11 @@ struct netns_xfrm { #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_hdr; #endif + + struct dst_ops xfrm4_dst_ops; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct dst_ops xfrm6_dst_ops; +#endif }; #endif diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index 35672b1cf44a..b60b28c99e87 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -45,6 +45,10 @@ struct pep_sock { u8 tx_fc; /* TX flow control */ u8 init_enable; /* auto-enable at creation */ u8 aligned; +#ifdef CONFIG_PHONET_PIPECTRLR + u8 pipe_state; + struct sockaddr_pn remote_pep; +#endif }; static inline struct pep_sock *pep_sk(struct sock *sk) @@ -77,6 +81,11 @@ static inline struct pnpipehdr *pnp_hdr(struct sk_buff *skb) #define MAX_PNPIPE_HEADER (MAX_PHONET_HEADER + 4) enum { + PNS_PIPE_CREATE_REQ = 0x00, + PNS_PIPE_CREATE_RESP, + PNS_PIPE_REMOVE_REQ, + PNS_PIPE_REMOVE_RESP, + PNS_PIPE_DATA = 0x20, PNS_PIPE_ALIGNED_DATA, @@ -160,4 +169,21 @@ enum { PEP_IND_READY, }; +#ifdef CONFIG_PHONET_PIPECTRLR +#define PNS_PEP_CONNECT_UTID 0x02 +#define PNS_PIPE_CREATED_IND_UTID 0x04 +#define PNS_PIPE_ENABLE_UTID 0x0A +#define PNS_PIPE_ENABLED_IND_UTID 0x0C +#define PNS_PIPE_DISABLE_UTID 0x0F +#define PNS_PIPE_DISABLED_IND_UTID 0x11 +#define PNS_PEP_DISCONNECT_UTID 0x06 + +/* Used for tracking state of a pipe */ +enum { + PIPE_IDLE, + PIPE_DISABLED, + PIPE_ENABLED, +}; +#endif /* CONFIG_PHONET_PIPECTRLR */ + #endif diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index 7b114079a51b..d5df797f9540 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -54,6 +54,11 @@ void pn_sock_hash(struct sock *sk); void pn_sock_unhash(struct sock *sk); int pn_sock_get_port(struct sock *sk, unsigned short sport); +struct sock *pn_find_sock_by_res(struct net *net, u8 res); +int pn_sock_bind_res(struct sock *sock, u8 res); +int pn_sock_unbind_res(struct sock *sk, u8 res); +void pn_sock_unbind_all_res(struct sock *sk); + int pn_skb_send(struct sock *sk, struct sk_buff *skb, const struct sockaddr_pn *target); diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index 2d16783d5e20..13649eb57413 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -57,5 +57,6 @@ struct net_device *phonet_route_output(struct net *net, u8 daddr); #define PN_NO_ADDR 0xff extern const struct file_operations pn_sock_seq_fops; +extern const struct file_operations pn_res_seq_fops; #endif diff --git a/include/net/raw.h b/include/net/raw.h index 43c57502659b..42ce6fe7a2d5 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -45,7 +45,10 @@ struct raw_iter_state { struct raw_hashinfo *h; }; -#define raw_seq_private(seq) ((struct raw_iter_state *)(seq)->private) +static inline struct raw_iter_state *raw_seq_private(struct seq_file *seq) +{ + return seq->private; +} void *raw_seq_start(struct seq_file *seq, loff_t *pos); void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos); void raw_seq_stop(struct seq_file *seq, void *v); diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index af60fd050844..e013c68bfb00 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -79,7 +79,6 @@ struct rtnl_link_ops { extern int __rtnl_link_register(struct rtnl_link_ops *ops); extern void __rtnl_link_unregister(struct rtnl_link_ops *ops); -extern void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops); extern int rtnl_link_register(struct rtnl_link_ops *ops); extern void rtnl_link_unregister(struct rtnl_link_ops *ops); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 3c8728aaab4e..ea1f8a83160d 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -328,8 +328,7 @@ extern void qdisc_destroy(struct Qdisc *qdisc); extern void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n); extern struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, struct Qdisc_ops *ops); -extern struct Qdisc *qdisc_create_dflt(struct net_device *dev, - struct netdev_queue *dev_queue, +extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, struct Qdisc_ops *ops, u32 parentid); extern void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab); @@ -601,7 +600,7 @@ static inline u32 qdisc_l2t(struct qdisc_rate_table* rtab, unsigned int pktlen) slot = 0; slot >>= rtab->rate.cell_log; if (slot > 255) - return (rtab->data[255]*(slot >> 8) + rtab->data[slot & 0xFF]); + return rtab->data[255]*(slot >> 8) + rtab->data[slot & 0xFF]; return rtab->data[slot]; } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 65946bc43d00..505845ddb0be 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -275,24 +275,35 @@ struct sctp_mib { /* Print debugging messages. */ #if SCTP_DEBUG extern int sctp_debug_flag; -#define SCTP_DEBUG_PRINTK(whatever...) \ - ((void) (sctp_debug_flag && printk(KERN_DEBUG whatever))) -#define SCTP_DEBUG_PRINTK_IPADDR(lead, trail, leadparm, saddr, otherparms...) \ - if (sctp_debug_flag) { \ - if (saddr->sa.sa_family == AF_INET6) { \ - printk(KERN_DEBUG \ - lead "%pI6" trail, \ - leadparm, \ - &saddr->v6.sin6_addr, \ - otherparms); \ - } else { \ - printk(KERN_DEBUG \ - lead "%pI4" trail, \ - leadparm, \ - &saddr->v4.sin_addr.s_addr, \ - otherparms); \ - } \ - } +#define SCTP_DEBUG_PRINTK(fmt, args...) \ +do { \ + if (sctp_debug_flag) \ + printk(KERN_DEBUG pr_fmt(fmt), ##args); \ +} while (0) +#define SCTP_DEBUG_PRINTK_CONT(fmt, args...) \ +do { \ + if (sctp_debug_flag) \ + pr_cont(fmt, ##args); \ +} while (0) +#define SCTP_DEBUG_PRINTK_IPADDR(fmt_lead, fmt_trail, \ + args_lead, saddr, args_trail...) \ +do { \ + if (sctp_debug_flag) { \ + if (saddr->sa.sa_family == AF_INET6) { \ + printk(KERN_DEBUG \ + pr_fmt(fmt_lead "%pI6" fmt_trail), \ + args_lead, \ + &saddr->v6.sin6_addr, \ + args_trail); \ + } else { \ + printk(KERN_DEBUG \ + pr_fmt(fmt_lead "%pI4" fmt_trail), \ + args_lead, \ + &saddr->v4.sin_addr.s_addr, \ + args_trail); \ + } \ + } \ +} while (0) #define SCTP_ENABLE_DEBUG { sctp_debug_flag = 1; } #define SCTP_DISABLE_DEBUG { sctp_debug_flag = 0; } @@ -306,6 +317,7 @@ extern int sctp_debug_flag; #else /* SCTP_DEBUG */ #define SCTP_DEBUG_PRINTK(whatever...) +#define SCTP_DEBUG_PRINTK_CONT(fmt, args...) #define SCTP_DEBUG_PRINTK_IPADDR(whatever...) #define SCTP_ENABLE_DEBUG #define SCTP_DISABLE_DEBUG @@ -393,7 +405,7 @@ static inline void sctp_v6_del_protocol(void) { return; } /* Map an association to an assoc_id. */ static inline sctp_assoc_t sctp_assoc2id(const struct sctp_association *asoc) { - return (asoc?asoc->assoc_id:0); + return asoc ? asoc->assoc_id : 0; } /* Look up the association by its id. */ @@ -461,7 +473,7 @@ static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) /* Tests if the list has one and only one entry. */ static inline int sctp_list_single_entry(struct list_head *head) { - return ((head->next != head) && (head->next == head->prev)); + return (head->next != head) && (head->next == head->prev); } /* Generate a random jitter in the range of -50% ~ +50% of input RTO. */ @@ -619,13 +631,13 @@ static inline int sctp_sanity_check(void) /* This is the hash function for the SCTP port hash table. */ static inline int sctp_phashfn(__u16 lport) { - return (lport & (sctp_port_hashsize - 1)); + return lport & (sctp_port_hashsize - 1); } /* This is the hash function for the endpoint hash table. */ static inline int sctp_ep_hashfn(__u16 lport) { - return (lport & (sctp_ep_hashsize - 1)); + return lport & (sctp_ep_hashsize - 1); } /* This is the hash function for the association hash table. */ @@ -633,7 +645,7 @@ static inline int sctp_assoc_hashfn(__u16 lport, __u16 rport) { int h = (lport << 16) + rport; h ^= h>>8; - return (h & (sctp_assoc_hashsize - 1)); + return h & (sctp_assoc_hashsize - 1); } /* This is the hash function for the association hash table. This is @@ -644,7 +656,7 @@ static inline int sctp_vtag_hashfn(__u16 lport, __u16 rport, __u32 vtag) { int h = (lport << 16) + rport; h ^= vtag; - return (h & (sctp_assoc_hashsize-1)); + return h & (sctp_assoc_hashsize - 1); } #define sctp_for_each_hentry(epb, node, head) \ diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 4088c89a9055..9352d12f02de 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -345,12 +345,12 @@ enum { static inline int TSN_lt(__u32 s, __u32 t) { - return (((s) - (t)) & TSN_SIGN_BIT); + return ((s) - (t)) & TSN_SIGN_BIT; } static inline int TSN_lte(__u32 s, __u32 t) { - return (((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT)); + return ((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT); } /* Compare two SSNs */ @@ -369,12 +369,12 @@ enum { static inline int SSN_lt(__u16 s, __u16 t) { - return (((s) - (t)) & SSN_SIGN_BIT); + return ((s) - (t)) & SSN_SIGN_BIT; } static inline int SSN_lte(__u16 s, __u16 t) { - return (((s) == (t)) || (((s) - (t)) & SSN_SIGN_BIT)); + return ((s) == (t)) || (((s) - (t)) & SSN_SIGN_BIT); } /* @@ -388,7 +388,7 @@ enum { static inline int ADDIP_SERIAL_gte(__u16 s, __u16 t) { - return (((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT)); + return ((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT); } /* Check VTAG of the packet matches the sender's own tag. */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f9e7473613bd..69fef4fb79c0 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -847,7 +847,7 @@ void sctp_packet_free(struct sctp_packet *); static inline int sctp_packet_empty(struct sctp_packet *packet) { - return (packet->size == packet->overhead); + return packet->size == packet->overhead; } /* This represents a remote transport address. diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h index 4aabc5a96cf6..e7728bc14ccf 100644 --- a/include/net/sctp/tsnmap.h +++ b/include/net/sctp/tsnmap.h @@ -157,7 +157,7 @@ __u16 sctp_tsnmap_pending(struct sctp_tsnmap *map); /* Is there a gap in the TSN map? */ static inline int sctp_tsnmap_has_gap(const struct sctp_tsnmap *map) { - return (map->cumulative_tsn_ack_point != map->max_tsn_seen); + return map->cumulative_tsn_ack_point != map->max_tsn_seen; } /* Mark a duplicate TSN. Note: limit the storage of duplicate TSN diff --git a/include/net/sock.h b/include/net/sock.h index adab9dc58183..73a4f9702a65 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1558,7 +1558,11 @@ static inline void sk_wake_async(struct sock *sk, int how, int band) } #define SOCK_MIN_SNDBUF 2048 -#define SOCK_MIN_RCVBUF 256 +/* + * Since sk_rmem_alloc sums skb->truesize, even a small frame might need + * sizeof(sk_buff) + MTU + padding, unless net driver perform copybreak + */ +#define SOCK_MIN_RCVBUF (2048 + sizeof(struct sk_buff)) static inline void sk_stream_moderate_sndbuf(struct sock *sk) { @@ -1670,17 +1674,13 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, /** * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped - * @msg: outgoing packet * @sk: socket sending this packet - * @shtx: filled with instructions for time stamping + * @tx_flags: filled with instructions for time stamping * * Currently only depends on SOCK_TIMESTAMPING* flags. Returns error code if * parameters are invalid. */ -extern int sock_tx_timestamp(struct msghdr *msg, - struct sock *sk, - union skb_shared_tx *shtx); - +extern int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); /** * sk_eat_skb - Release a skb if it is no longer needed diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h new file mode 100644 index 000000000000..9e8710be7a04 --- /dev/null +++ b/include/net/tc_act/tc_csum.h @@ -0,0 +1,15 @@ +#ifndef __NET_TC_CSUM_H +#define __NET_TC_CSUM_H + +#include <linux/types.h> +#include <net/act_api.h> + +struct tcf_csum { + struct tcf_common common; + + u32 update_flags; +}; +#define to_tcf_csum(pc) \ + container_of(pc,struct tcf_csum,common) + +#endif /* __NET_TC_CSUM_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 3e4b33e36602..4fee0424af7e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -346,8 +346,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk, } } -extern void tcp_enter_quickack_mode(struct sock *sk); - #define TCP_ECN_OK 1 #define TCP_ECN_QUEUE_CWR 2 #define TCP_ECN_DEMAND_CWR 4 @@ -803,6 +801,15 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) /* Use define here intentionally to get WARN_ON location shown at the caller */ #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out) +/* + * Convert RFC 3390 larger initial window into an equivalent number of packets. + * This is based on the numbers specified in RFC 5681, 3.1. + */ +static inline u32 rfc3390_bytes_to_packets(const u32 smss) +{ + return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); +} + extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); diff --git a/include/net/tipc/tipc.h b/include/net/tipc/tipc.h index 15af6dca0b49..1e0645e1eed2 100644 --- a/include/net/tipc/tipc.h +++ b/include/net/tipc/tipc.h @@ -50,8 +50,6 @@ * TIPC operating mode routines */ -u32 tipc_get_addr(void); - #define TIPC_NOT_RUNNING 0 #define TIPC_NODE_MODE 1 #define TIPC_NET_MODE 2 @@ -62,8 +60,6 @@ int tipc_attach(unsigned int *userref, tipc_mode_event, void *usr_handle); void tipc_detach(unsigned int userref); -int tipc_get_mode(void); - /* * TIPC port manipulation routines */ @@ -153,12 +149,6 @@ int tipc_disconnect(u32 portref); int tipc_shutdown(u32 ref); -int tipc_isconnected(u32 portref, int *isconnected); - -int tipc_peer(u32 portref, struct tipc_portid *peer); - -int tipc_ref_valid(u32 portref); - /* * TIPC messaging routines */ @@ -170,38 +160,12 @@ int tipc_send(u32 portref, unsigned int num_sect, struct iovec const *msg_sect); -int tipc_send_buf(u32 portref, - struct sk_buff *buf, - unsigned int dsz); - int tipc_send2name(u32 portref, struct tipc_name const *name, u32 domain, unsigned int num_sect, struct iovec const *msg_sect); -int tipc_send_buf2name(u32 portref, - struct tipc_name const *name, - u32 domain, - struct sk_buff *buf, - unsigned int dsz); - -int tipc_forward2name(u32 portref, - struct tipc_name const *name, - u32 domain, - unsigned int section_count, - struct iovec const *msg_sect, - struct tipc_portid const *origin, - unsigned int importance); - -int tipc_forward_buf2name(u32 portref, - struct tipc_name const *name, - u32 domain, - struct sk_buff *buf, - unsigned int dsz, - struct tipc_portid const *orig, - unsigned int importance); - int tipc_send2port(u32 portref, struct tipc_portid const *dest, unsigned int num_sect, @@ -212,46 +176,11 @@ int tipc_send_buf2port(u32 portref, struct sk_buff *buf, unsigned int dsz); -int tipc_forward2port(u32 portref, - struct tipc_portid const *dest, - unsigned int num_sect, - struct iovec const *msg_sect, - struct tipc_portid const *origin, - unsigned int importance); - -int tipc_forward_buf2port(u32 portref, - struct tipc_portid const *dest, - struct sk_buff *buf, - unsigned int dsz, - struct tipc_portid const *orig, - unsigned int importance); - int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, u32 domain, /* currently unused */ unsigned int section_count, struct iovec const *msg); - -#if 0 -int tipc_multicast_buf(u32 portref, - struct tipc_name_seq const *seq, - u32 domain, - void *buf, - unsigned int size); -#endif - -/* - * TIPC subscription routines - */ - -int tipc_ispublished(struct tipc_name const *name); - -/* - * Get number of available nodes within specified domain (excluding own node) - */ - -unsigned int tipc_available_nodes(const u32 domain); - #endif #endif diff --git a/include/net/tipc/tipc_msg.h b/include/net/tipc/tipc_msg.h index 2e159a812f83..ffe50b4e7b93 100644 --- a/include/net/tipc/tipc_msg.h +++ b/include/net/tipc/tipc_msg.h @@ -107,7 +107,7 @@ static inline u32 msg_hdr_sz(struct tipc_msg *m) static inline int msg_short(struct tipc_msg *m) { - return (msg_hdr_sz(m) == 24); + return msg_hdr_sz(m) == 24; } static inline u32 msg_size(struct tipc_msg *m) @@ -117,7 +117,7 @@ static inline u32 msg_size(struct tipc_msg *m) static inline u32 msg_data_sz(struct tipc_msg *m) { - return (msg_size(m) - msg_hdr_sz(m)); + return msg_size(m) - msg_hdr_sz(m); } static inline unchar *msg_data(struct tipc_msg *m) @@ -132,17 +132,17 @@ static inline u32 msg_type(struct tipc_msg *m) static inline u32 msg_named(struct tipc_msg *m) { - return (msg_type(m) == TIPC_NAMED_MSG); + return msg_type(m) == TIPC_NAMED_MSG; } static inline u32 msg_mcast(struct tipc_msg *m) { - return (msg_type(m) == TIPC_MCAST_MSG); + return msg_type(m) == TIPC_MCAST_MSG; } static inline u32 msg_connected(struct tipc_msg *m) { - return (msg_type(m) == TIPC_CONN_MSG); + return msg_type(m) == TIPC_CONN_MSG; } static inline u32 msg_errcode(struct tipc_msg *m) diff --git a/include/net/tipc/tipc_port.h b/include/net/tipc/tipc_port.h index c54917cbfa48..1893aaf49426 100644 --- a/include/net/tipc/tipc_port.h +++ b/include/net/tipc/tipc_port.h @@ -88,8 +88,6 @@ void tipc_acknowledge(u32 port_ref,u32 ack); struct tipc_port *tipc_get_port(const u32 ref); -void *tipc_get_handle(const u32 ref); - /* * The following routines require that the port be locked on entry */ diff --git a/include/net/udp.h b/include/net/udp.h index a184d3496b13..200b82848c9a 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -183,6 +183,9 @@ extern int udp_lib_setsockopt(struct sock *sk, int level, int optname, extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); +extern struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, __be16 dport, + int dif); /* * SNMP statistics for UDP and UDP-Lite diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4f53532d4c2f..f28d7c9b9f8d 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1419,7 +1419,6 @@ extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family); extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family); extern __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr); -extern void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr); extern __be32 xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr); extern int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb); @@ -1466,8 +1465,6 @@ struct xfrm_state *xfrm_find_acq(struct net *net, struct xfrm_mark *mark, xfrm_address_t *saddr, int create, unsigned short family); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); -extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, - struct flowi *fl, int family, int strict); #ifdef CONFIG_XFRM_MIGRATE extern int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h deleted file mode 100644 index 68d8bde7e8d6..000000000000 --- a/include/pcmcia/cs.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * cs.h - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * The initial developer of the original code is David A. Hinds - * <dahinds@users.sourceforge.net>. Portions created by David A. Hinds - * are Copyright (C) 1999 David A. Hinds. All Rights Reserved. - * - * (C) 1999 David A. Hinds - */ - -#ifndef _LINUX_CS_H -#define _LINUX_CS_H - -#ifdef __KERNEL__ -#include <linux/interrupt.h> -#endif - -/* ModifyConfiguration */ -typedef struct modconf_t { - u_int Attributes; - u_int Vcc, Vpp1, Vpp2; -} modconf_t; - -/* Attributes for ModifyConfiguration */ -#define CONF_IRQ_CHANGE_VALID 0x0100 -#define CONF_VCC_CHANGE_VALID 0x0200 -#define CONF_VPP1_CHANGE_VALID 0x0400 -#define CONF_VPP2_CHANGE_VALID 0x0800 -#define CONF_IO_CHANGE_WIDTH 0x1000 - -/* For RequestConfiguration */ -typedef struct config_req_t { - u_int Attributes; - u_int Vpp; /* both Vpp1 and Vpp2 */ - u_int IntType; - u_int ConfigBase; - u_char Status, Pin, Copy, ExtStatus; - u_char ConfigIndex; - u_int Present; -} config_req_t; - -/* Attributes for RequestConfiguration */ -#define CONF_ENABLE_IRQ 0x01 -#define CONF_ENABLE_DMA 0x02 -#define CONF_ENABLE_SPKR 0x04 -#define CONF_ENABLE_PULSE_IRQ 0x08 -#define CONF_VALID_CLIENT 0x100 - -/* IntType field */ -#define INT_MEMORY 0x01 -#define INT_MEMORY_AND_IO 0x02 -#define INT_CARDBUS 0x04 -#define INT_ZOOMED_VIDEO 0x08 - -/* Configuration registers present */ -#define PRESENT_OPTION 0x001 -#define PRESENT_STATUS 0x002 -#define PRESENT_PIN_REPLACE 0x004 -#define PRESENT_COPY 0x008 -#define PRESENT_EXT_STATUS 0x010 -#define PRESENT_IOBASE_0 0x020 -#define PRESENT_IOBASE_1 0x040 -#define PRESENT_IOBASE_2 0x080 -#define PRESENT_IOBASE_3 0x100 -#define PRESENT_IOSIZE 0x200 - -/* For RequestWindow */ -typedef struct win_req_t { - u_int Attributes; - u_long Base; - u_int Size; - u_int AccessSpeed; -} win_req_t; - -/* Attributes for RequestWindow */ -#define WIN_MEMORY_TYPE_CM 0x00 /* default */ -#define WIN_MEMORY_TYPE_AM 0x20 /* MAP_ATTRIB */ -#define WIN_DATA_WIDTH_8 0x00 /* default */ -#define WIN_DATA_WIDTH_16 0x02 /* MAP_16BIT */ -#define WIN_ENABLE 0x01 /* MAP_ACTIVE */ -#define WIN_USE_WAIT 0x40 /* MAP_USE_WAIT */ - -#define WIN_FLAGS_MAP 0x63 /* MAP_ATTRIB | MAP_16BIT | MAP_ACTIVE | - MAP_USE_WAIT */ -#define WIN_FLAGS_REQ 0x1c /* mapping to socket->win[i]: - 0x04 -> 0 - 0x08 -> 1 - 0x0c -> 2 - 0x10 -> 3 */ - -#endif /* _LINUX_CS_H */ diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index 70c58ed2278c..8479b66c067b 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -24,9 +24,11 @@ #ifdef __KERNEL__ #include <linux/device.h> +#include <linux/interrupt.h> #include <pcmcia/ss.h> #include <asm/atomic.h> + /* * PCMCIA device drivers (16-bit cards only; 32-bit cards require CardBus * a.k.a. PCI drivers @@ -36,8 +38,6 @@ struct pcmcia_device; struct config_t; struct net_device; -typedef struct resource *window_handle_t; - /* dynamic device IDs for PCMCIA device drivers. See * Documentation/pcmcia/driver.txt for details. */ @@ -47,6 +47,8 @@ struct pcmcia_dynids { }; struct pcmcia_driver { + const char *name; + int (*probe) (struct pcmcia_device *dev); void (*remove) (struct pcmcia_device *dev); @@ -90,15 +92,17 @@ struct pcmcia_device { struct list_head socket_device_list; - /* deprecated, will be cleaned up soon */ - config_req_t conf; - window_handle_t win; - /* device setup */ unsigned int irq; struct resource *resource[PCMCIA_NUM_RESOURCES]; + resource_size_t card_addr; /* for the 1st IOMEM resource */ + unsigned int vpp; - unsigned int io_lines; /* number of I/O lines */ + unsigned int config_flags; /* CONF_ENABLE_ flags below */ + unsigned int config_base; + unsigned int config_index; + unsigned int config_regs; /* PRESENT_ flags below */ + unsigned int io_lines; /* number of I/O lines */ /* Is the device suspended? */ u16 suspended:1; @@ -174,9 +178,6 @@ int pcmcia_parse_tuple(tuple_t *tuple, cisparse_t *parse); /* loop CIS entries for valid configuration */ int pcmcia_loop_config(struct pcmcia_device *p_dev, int (*conf_check) (struct pcmcia_device *p_dev, - cistpl_cftable_entry_t *cf, - cistpl_cftable_entry_t *dflt, - unsigned int vcc, void *priv_data), void *priv_data); @@ -206,16 +207,17 @@ pcmcia_request_exclusive_irq(struct pcmcia_device *p_dev, int __must_check pcmcia_request_irq(struct pcmcia_device *p_dev, irq_handler_t handler); -int pcmcia_request_configuration(struct pcmcia_device *p_dev, - config_req_t *req); +int pcmcia_enable_device(struct pcmcia_device *p_dev); -int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, - window_handle_t *wh); -int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win); -int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t win, +int pcmcia_request_window(struct pcmcia_device *p_dev, struct resource *res, + unsigned int speed); +int pcmcia_release_window(struct pcmcia_device *p_dev, struct resource *res); +int pcmcia_map_mem_page(struct pcmcia_device *p_dev, struct resource *res, unsigned int offset); -int pcmcia_modify_configuration(struct pcmcia_device *p_dev, modconf_t *mod); +int pcmcia_fixup_vpp(struct pcmcia_device *p_dev, unsigned char new_vpp); +int pcmcia_fixup_iowidth(struct pcmcia_device *p_dev); + void pcmcia_disable_device(struct pcmcia_device *p_dev); /* IO ports */ @@ -224,15 +226,48 @@ void pcmcia_disable_device(struct pcmcia_device *p_dev); #define IO_DATA_PATH_WIDTH_16 0x08 #define IO_DATA_PATH_WIDTH_AUTO 0x10 -/* convert flag found in cfgtable to data path width parameter */ -static inline int pcmcia_io_cfg_data_width(unsigned int flags) -{ - if (!(flags & CISTPL_IO_8BIT)) - return IO_DATA_PATH_WIDTH_16; - if (!(flags & CISTPL_IO_16BIT)) - return IO_DATA_PATH_WIDTH_8; - return IO_DATA_PATH_WIDTH_AUTO; -} +/* IO memory */ +#define WIN_MEMORY_TYPE_CM 0x00 /* default */ +#define WIN_MEMORY_TYPE_AM 0x20 /* MAP_ATTRIB */ +#define WIN_DATA_WIDTH_8 0x00 /* default */ +#define WIN_DATA_WIDTH_16 0x02 /* MAP_16BIT */ +#define WIN_ENABLE 0x01 /* MAP_ACTIVE */ +#define WIN_USE_WAIT 0x40 /* MAP_USE_WAIT */ + +#define WIN_FLAGS_MAP 0x63 /* MAP_ATTRIB | MAP_16BIT | MAP_ACTIVE | + MAP_USE_WAIT */ +#define WIN_FLAGS_REQ 0x1c /* mapping to socket->win[i]: + 0x04 -> 0 + 0x08 -> 1 + 0x0c -> 2 + 0x10 -> 3 */ + +/* config_reg{ister}s present for this PCMCIA device */ +#define PRESENT_OPTION 0x001 +#define PRESENT_STATUS 0x002 +#define PRESENT_PIN_REPLACE 0x004 +#define PRESENT_COPY 0x008 +#define PRESENT_EXT_STATUS 0x010 +#define PRESENT_IOBASE_0 0x020 +#define PRESENT_IOBASE_1 0x040 +#define PRESENT_IOBASE_2 0x080 +#define PRESENT_IOBASE_3 0x100 +#define PRESENT_IOSIZE 0x200 + +/* flags to be passed to pcmcia_enable_device() */ +#define CONF_ENABLE_IRQ 0x0001 +#define CONF_ENABLE_SPKR 0x0002 +#define CONF_ENABLE_PULSE_IRQ 0x0004 +#define CONF_ENABLE_ESR 0x0008 +#define CONF_ENABLE_IOCARD 0x0010 /* auto-enabled if IO resources or IRQ + * (CONF_ENABLE_IRQ) in use */ + +/* flags used by pcmcia_loop_config() autoconfiguration */ +#define CONF_AUTO_CHECK_VCC 0x0100 /* check for matching Vcc? */ +#define CONF_AUTO_SET_VPP 0x0200 /* set Vpp? */ +#define CONF_AUTO_AUDIO 0x0400 /* enable audio line? */ +#define CONF_AUTO_SET_IO 0x0800 /* set ->resource[0,1] */ +#define CONF_AUTO_SET_IOMEM 0x1000 /* set ->resource[2] */ #endif /* __KERNEL__ */ diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h index 626b63c33d9e..731cde010f42 100644 --- a/include/pcmcia/ss.h +++ b/include/pcmcia/ss.h @@ -19,7 +19,6 @@ #include <linux/sched.h> /* task_struct, completion */ #include <linux/mutex.h> -#include <pcmcia/cs.h> #ifdef CONFIG_CARDBUS #include <linux/pci.h> #endif diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index d06e13be717b..3dec1949f69c 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -205,6 +205,7 @@ struct domain_device { }; void *lldd_dev; + int gone; }; struct sas_discovery_event { diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index 8fcb6e0e9e72..216af8538cc9 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -32,6 +32,12 @@ struct scsi_cmnd; #endif /* + * DIX-capable adapters effectively support infinite chaining for the + * protection information scatterlist + */ +#define SCSI_MAX_PROT_SG_SEGMENTS 0xFFFF + +/* * Special value for scanning to specify scanning or rescanning of all * possible channels, (target) ids, or luns on a given shost. */ @@ -67,6 +73,7 @@ struct scsi_cmnd; #define SEND_DIAGNOSTIC 0x1d #define ALLOW_MEDIUM_REMOVAL 0x1e +#define READ_FORMAT_CAPACITIES 0x23 #define SET_WINDOW 0x24 #define READ_CAPACITY 0x25 #define READ_10 0x28 @@ -96,6 +103,7 @@ struct scsi_cmnd; #define WRITE_SAME 0x41 #define UNMAP 0x42 #define READ_TOC 0x43 +#define READ_HEADER 0x44 #define LOG_SELECT 0x4c #define LOG_SENSE 0x4d #define XDWRITEREAD_10 0x53 diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 50cb34ffef11..85867dcde335 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -148,6 +148,8 @@ struct scsi_device { unsigned retry_hwerror:1; /* Retry HARDWARE_ERROR */ unsigned last_sector_bug:1; /* do not use multisector accesses on SD_LAST_BUGGY_SECTORS */ + unsigned no_read_disc_info:1; /* Avoid READ_DISC_INFO cmds */ + unsigned no_read_capacity_16:1; /* Avoid READ_CAPACITY_16 cmds */ unsigned is_visible:1; /* is the device visible in sysfs */ DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */ diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index b7bdecb7b76e..d0a6a845f204 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -388,6 +388,7 @@ struct scsi_host_template { * of scatter-gather. */ unsigned short sg_tablesize; + unsigned short sg_prot_tablesize; /* * Set this if the host adapter has limitations beside segment count. @@ -599,6 +600,7 @@ struct Scsi_Host { int can_queue; short cmd_per_lun; short unsigned int sg_tablesize; + short unsigned int sg_prot_tablesize; short unsigned int max_sectors; unsigned long dma_boundary; /* @@ -823,6 +825,11 @@ static inline unsigned int scsi_host_get_prot(struct Scsi_Host *shost) return shost->prot_capabilities; } +static inline int scsi_host_prot_dma(struct Scsi_Host *shost) +{ + return shost->prot_capabilities >= SHOST_DIX_TYPE0_PROTECTION; +} + static inline unsigned int scsi_host_dif_capable(struct Scsi_Host *shost, unsigned int target_type) { static unsigned char cap[] = { 0, diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h index 17231385cb37..d6e7994aa634 100644 --- a/include/scsi/scsi_tcq.h +++ b/include/scsi/scsi_tcq.h @@ -97,13 +97,9 @@ static inline void scsi_deactivate_tcq(struct scsi_device *sdev, int depth) static inline int scsi_populate_tag_msg(struct scsi_cmnd *cmd, char *msg) { struct request *req = cmd->request; - struct scsi_device *sdev = cmd->device; if (blk_rq_tagged(req)) { - if (sdev->ordered_tags && req->cmd_flags & REQ_HARDBARRIER) - *msg++ = MSG_ORDERED_TAG; - else - *msg++ = MSG_SIMPLE_TAG; + *msg++ = MSG_SIMPLE_TAG; *msg++ = req->tag; return 2; } diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index 87d81b3ce564..59816fe31e68 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -496,6 +496,7 @@ struct fc_host_attrs { u64 fabric_name; char symbolic_name[FC_SYMBOLIC_NAME_SIZE]; char system_hostname[FC_SYMBOLIC_NAME_SIZE]; + u32 dev_loss_tmo; /* Private (Transport-managed) Attributes */ enum fc_tgtid_binding_type tgtid_bind_type; @@ -580,6 +581,8 @@ struct fc_host_attrs { (((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q_name) #define fc_host_devloss_work_q(x) \ (((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q) +#define fc_host_dev_loss_tmo(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->dev_loss_tmo) struct fc_bsg_buffer { diff --git a/include/sound/core.h b/include/sound/core.h index 89e0ac17f44a..df26ebbfa9c6 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -133,9 +133,7 @@ struct snd_card { int free_on_last_close; /* free in context of file_release */ wait_queue_head_t shutdown_sleep; struct device *dev; /* device assigned to this card */ -#ifndef CONFIG_SYSFS_DEPRECATED struct device *card_dev; /* cardX object for sysfs */ -#endif #ifdef CONFIG_PM unsigned int power_state; /* power state */ @@ -196,11 +194,7 @@ struct snd_minor { /* return a device pointer linked to each sound device as a parent */ static inline struct device *snd_card_get_device_link(struct snd_card *card) { -#ifdef CONFIG_SYSFS_DEPRECATED - return card ? card->dev : NULL; -#else return card ? card->card_dev : NULL; -#endif } /* sound.c */ diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 0e4cfb694fe7..6fa7cbab7d93 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -5,7 +5,9 @@ #define _TRACE_IRQ_H #include <linux/tracepoint.h> -#include <linux/interrupt.h> + +struct irqaction; +struct softirq_action; #define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq } #define show_softirq_name(val) \ @@ -93,7 +95,10 @@ DECLARE_EVENT_CLASS(softirq, ), TP_fast_assign( - __entry->vec = (int)(h - vec); + if (vec) + __entry->vec = (int)(h - vec); + else + __entry->vec = (int)(long)h; ), TP_printk("vec=%d [action=%s]", __entry->vec, @@ -136,6 +141,23 @@ DEFINE_EVENT(softirq, softirq_exit, TP_ARGS(h, vec) ); +/** + * softirq_raise - called immediately when a softirq is raised + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter contains a pointer to the softirq vector number which is + * raised. @vec is NULL and it means @h includes vector number not + * softirq_action. When used in combination with the softirq_entry tracepoint + * we can determine the softirq raise latency. + */ +DEFINE_EVENT(softirq, softirq_raise, + + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + + TP_ARGS(h, vec) +); + #endif /* _TRACE_IRQ_H */ /* This part must be outside protection */ diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h index 188deca2f3c7..8fe1e93f531d 100644 --- a/include/trace/events/napi.h +++ b/include/trace/events/napi.h @@ -6,10 +6,31 @@ #include <linux/netdevice.h> #include <linux/tracepoint.h> +#include <linux/ftrace.h> + +#define NO_DEV "(no_device)" + +TRACE_EVENT(napi_poll, -DECLARE_TRACE(napi_poll, TP_PROTO(struct napi_struct *napi), - TP_ARGS(napi)); + + TP_ARGS(napi), + + TP_STRUCT__entry( + __field( struct napi_struct *, napi) + __string( dev_name, napi->dev ? napi->dev->name : NO_DEV) + ), + + TP_fast_assign( + __entry->napi = napi; + __assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV); + ), + + TP_printk("napi poll on napi struct %p for device %s", + __entry->napi, __get_str(dev_name)) +); + +#undef NO_DEV #endif /* _TRACE_NAPI_H_ */ diff --git a/include/trace/events/net.h b/include/trace/events/net.h new file mode 100644 index 000000000000..5f247f5ffc56 --- /dev/null +++ b/include/trace/events/net.h @@ -0,0 +1,82 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM net + +#if !defined(_TRACE_NET_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NET_H + +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/ip.h> +#include <linux/tracepoint.h> + +TRACE_EVENT(net_dev_xmit, + + TP_PROTO(struct sk_buff *skb, + int rc), + + TP_ARGS(skb, rc), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned int, len ) + __field( int, rc ) + __string( name, skb->dev->name ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + __entry->len = skb->len; + __entry->rc = rc; + __assign_str(name, skb->dev->name); + ), + + TP_printk("dev=%s skbaddr=%p len=%u rc=%d", + __get_str(name), __entry->skbaddr, __entry->len, __entry->rc) +); + +DECLARE_EVENT_CLASS(net_dev_template, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned int, len ) + __string( name, skb->dev->name ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + __entry->len = skb->len; + __assign_str(name, skb->dev->name); + ), + + TP_printk("dev=%s skbaddr=%p len=%u", + __get_str(name), __entry->skbaddr, __entry->len) +) + +DEFINE_EVENT(net_dev_template, net_dev_queue, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb) +); + +DEFINE_EVENT(net_dev_template, netif_receive_skb, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb) +); + +DEFINE_EVENT(net_dev_template, netif_rx, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb) +); +#endif /* _TRACE_NET_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 35a2a6e7bf1e..286784d69b8f 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -10,12 +10,17 @@ #ifndef _TRACE_POWER_ENUM_ #define _TRACE_POWER_ENUM_ enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, + POWER_NONE = 0, + POWER_CSTATE = 1, /* C-State */ + POWER_PSTATE = 2, /* Fequency change or DVFS */ + POWER_SSTATE = 3, /* Suspend */ }; #endif +/* + * The power events are used for cpuidle & suspend (power_start, power_end) + * and for cpufreq (power_frequency) + */ DECLARE_EVENT_CLASS(power, TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), @@ -70,6 +75,85 @@ TRACE_EVENT(power_end, ); +/* + * The clock events are used for clock enable/disable and for + * clock rate change + */ +DECLARE_EVENT_CLASS(clock, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id), + + TP_STRUCT__entry( + __string( name, name ) + __field( u64, state ) + __field( u64, cpu_id ) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->state = state; + __entry->cpu_id = cpu_id; + ), + + TP_printk("%s state=%lu cpu_id=%lu", __get_str(name), + (unsigned long)__entry->state, (unsigned long)__entry->cpu_id) +); + +DEFINE_EVENT(clock, clock_enable, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id) +); + +DEFINE_EVENT(clock, clock_disable, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id) +); + +DEFINE_EVENT(clock, clock_set_rate, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id) +); + +/* + * The power domain events are used for power domains transitions + */ +DECLARE_EVENT_CLASS(power_domain, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id), + + TP_STRUCT__entry( + __string( name, name ) + __field( u64, state ) + __field( u64, cpu_id ) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->state = state; + __entry->cpu_id = cpu_id; +), + + TP_printk("%s state=%lu cpu_id=%lu", __get_str(name), + (unsigned long)__entry->state, (unsigned long)__entry->cpu_id) +); + +DEFINE_EVENT(power_domain, power_domain_target, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id) +); + #endif /* _TRACE_POWER_H */ /* This part must be outside protection */ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 9208c92aeab5..f6334782a593 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -362,6 +362,35 @@ TRACE_EVENT(sched_stat_runtime, (unsigned long long)__entry->vruntime) ); +/* + * Tracepoint for showing priority inheritance modifying a tasks + * priority. + */ +TRACE_EVENT(sched_pi_setprio, + + TP_PROTO(struct task_struct *tsk, int newprio), + + TP_ARGS(tsk, newprio), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, oldprio ) + __field( int, newprio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->oldprio = tsk->prio; + __entry->newprio = newprio; + ), + + TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", + __entry->comm, __entry->pid, + __entry->oldprio, __entry->newprio) +); + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 4b2be6dc76f0..75ce9d500d8e 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -35,6 +35,23 @@ TRACE_EVENT(kfree_skb, __entry->skbaddr, __entry->protocol, __entry->location) ); +TRACE_EVENT(consume_skb, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + ), + + TP_printk("skbaddr=%p", __entry->skbaddr) +); + TRACE_EVENT(skb_copy_datagram_iovec, TP_PROTO(const struct sk_buff *skb, int len), diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h index 49682d7e9d60..7d497291c85d 100644 --- a/include/trace/events/workqueue.h +++ b/include/trace/events/workqueue.h @@ -7,38 +7,83 @@ #include <linux/tracepoint.h> #include <linux/workqueue.h> +DECLARE_EVENT_CLASS(workqueue_work, + + TP_PROTO(struct work_struct *work), + + TP_ARGS(work), + + TP_STRUCT__entry( + __field( void *, work ) + ), + + TP_fast_assign( + __entry->work = work; + ), + + TP_printk("work struct %p", __entry->work) +); + /** - * workqueue_execute_start - called immediately before the workqueue callback + * workqueue_queue_work - called when a work gets queued + * @req_cpu: the requested cpu + * @cwq: pointer to struct cpu_workqueue_struct * @work: pointer to struct work_struct * - * Allows to track workqueue execution. + * This event occurs when a work is queued immediately or once a + * delayed work is actually queued on a workqueue (ie: once the delay + * has been reached). */ -TRACE_EVENT(workqueue_execute_start, +TRACE_EVENT(workqueue_queue_work, - TP_PROTO(struct work_struct *work), + TP_PROTO(unsigned int req_cpu, struct cpu_workqueue_struct *cwq, + struct work_struct *work), - TP_ARGS(work), + TP_ARGS(req_cpu, cwq, work), TP_STRUCT__entry( __field( void *, work ) __field( void *, function) + __field( void *, workqueue) + __field( unsigned int, req_cpu ) + __field( unsigned int, cpu ) ), TP_fast_assign( __entry->work = work; __entry->function = work->func; + __entry->workqueue = cwq->wq; + __entry->req_cpu = req_cpu; + __entry->cpu = cwq->gcwq->cpu; ), - TP_printk("work struct %p: function %pf", __entry->work, __entry->function) + TP_printk("work struct=%p function=%pf workqueue=%p req_cpu=%u cpu=%u", + __entry->work, __entry->function, __entry->workqueue, + __entry->req_cpu, __entry->cpu) ); /** - * workqueue_execute_end - called immediately before the workqueue callback + * workqueue_activate_work - called when a work gets activated + * @work: pointer to struct work_struct + * + * This event occurs when a queued work is put on the active queue, + * which happens immediately after queueing unless @max_active limit + * is reached. + */ +DEFINE_EVENT(workqueue_work, workqueue_activate_work, + + TP_PROTO(struct work_struct *work), + + TP_ARGS(work) +); + +/** + * workqueue_execute_start - called immediately before the workqueue callback * @work: pointer to struct work_struct * * Allows to track workqueue execution. */ -TRACE_EVENT(workqueue_execute_end, +TRACE_EVENT(workqueue_execute_start, TP_PROTO(struct work_struct *work), @@ -46,15 +91,29 @@ TRACE_EVENT(workqueue_execute_end, TP_STRUCT__entry( __field( void *, work ) + __field( void *, function) ), TP_fast_assign( __entry->work = work; + __entry->function = work->func; ), - TP_printk("work struct %p", __entry->work) + TP_printk("work struct %p: function %pf", __entry->work, __entry->function) ); +/** + * workqueue_execute_end - called immediately before the workqueue callback + * @work: pointer to struct work_struct + * + * Allows to track workqueue execution. + */ +DEFINE_EVENT(workqueue_work, workqueue_execute_end, + + TP_PROTO(struct work_struct *work), + + TP_ARGS(work) +); #endif /* _TRACE_WORKQUEUE_H */ |